diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..f8cfb4b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,59 @@ +--- +name: Lint +on: + push: + paths: + - "**.go" + - go.mod + - go.sum + pull_request: + paths: + - "**.go" + - go.mod + - go.sum + +permissions: + contents: read + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.17 + + - name: Check out code + uses: actions/checkout@v2 + + - name: Verify dependencies + run: | + go mod verify + go mod download + + LINT_VERSION=1.43.0 + curl -fsSL https://github.com/golangci/golangci-lint/releases/download/v${LINT_VERSION}/golangci-lint-${LINT_VERSION}-linux-amd64.tar.gz | \ + tar xz --strip-components 1 --wildcards \*/golangci-lint + mkdir -p bin && mv golangci-lint bin/ + + - name: Run checks + run: | + STATUS=0 + assert-nothing-changed() { + local diff + "$@" >/dev/null || return 1 + if ! diff="$(git diff -U1 --color --exit-code)"; then + printf '\e[31mError: running `\e[1m%s\e[22m` results in modifications that you must check into version control:\e[0m\n%s\n\n' "$*" "$diff" >&2 + git checkout -- . + STATUS=1 + fi + } + + assert-nothing-changed go fmt ./... + assert-nothing-changed go mod tidy + + bin/golangci-lint run --out-format=github-actions --timeout=3m || STATUS=$? + + exit $STATUS diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..b35a733 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,35 @@ +--- +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + lint: + name: Release + runs-on: ubuntu-latest + steps: + - name: Setup + uses: + actions/setup-go@v4 + with: + go-version: 1.21 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Build releases + run: | + make releases VERSION=$GITHUB_REF_NAME + + - name: Release + uses: softprops/action-gh-release@v1 + with: + draft: true + files: | + releases/git-sizer-* diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..542f410 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,38 @@ +--- +on: [push, pull_request] +name: Test +permissions: + contents: read +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + fail-fast: false + runs-on: ${{ matrix.os }} + steps: + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version: '1.21.3' + + - name: Check out code + uses: actions/checkout@v2 + + - name: Get full repo history + run: git fetch --prune --unshallow --tags + + - name: Download dependencies + shell: bash + run: go mod download + + - name: Build + shell: bash + run: | + mkdir -p bin + go build -o bin . + ls -la bin + + - name: Test + shell: bash + run: go test -race -timeout 60s ./... diff --git a/.gitignore b/.gitignore index d66fcf8..9fb1b1b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /bin /releases +/vendor diff --git a/.golangci.toml b/.golangci.toml new file mode 100644 index 0000000..362ac4f --- /dev/null +++ b/.golangci.toml @@ -0,0 +1,443 @@ +[linters] + # This file is intended to be used by your IDE to show you what linting + # issues exist in the code as you work on it. The github actions will run + # only the Tier 1 linters against the whole codebase (see + # .golangci-repo.toml, but it should be the same as the tier 1 list here). + # The tier 2 and 3 linters will run only against the files you change in a + # PR, so that you can clean up as you go. + # + # To see what issues will be present on just the PR files, you can run + # golangci-lint run --new-from-rev=origin/main + + # format of this list: + # "lintername", # description + # reason it's enabled + enable = [ + # + # Full Repo Scan - Linters that find bugs. + # + + "bodyclose", # checks whether HTTP response body is closed successfully + # Forgetting to close an HTTP body can be a memory leak + "durationcheck", # check for two durations multiplied together + # this is probably a rare bug, but should have basically zero false positives. + "errcheck", # finds unchecked error returns + # Checking all errors is just good dev practice. + "errorlint", # finds code that will cause problems with the error wrapping scheme introduced in Go 1.13 + # This ensures you use errors.Is instead of == to compare errors, to avoid bugs with wrapping. + "exportloopref", # catch bugs resulting from referencing variables on range scope + # variables initialized in for loops change with each loop, which can cause bugs. + "forcetypeassert", # finds type asserts where you don't use the v, ok format + # if you do v := foo.(bar) and foo is not a bar, this will panic, and that's bad. + "gocritic", # Provides many diagnostics that check for bugs, performance and style issues. + # This is highly configurable, see the gocritic config section below. + "goerr113", # checks that you use errors.Is and don't define your own errors except as package variables. + # If you don't use errors.Is, then your code can break if someone wraps an error before they + # return it. Creating errors with errors.New("some message") makes a magic error that no one + # can handle, so either create it as a sentinel, or give it a type that people can check against. + "goimports", # check that all code is formatted with goimports + # Formating is good. goimports is better (and formats imports slightly differently than gofmt). + "gosec", # Inspects source code for security problems + # high quality linter that finds real bugs + "govet", # reports suspicious constructs like printf calls that don't have the right # of arguments + # high quality, low false positives + "ineffassign", # Detects when assignments to existing variables are not used + # this finds bugs all the time, where you assign to a value but then never use + # the assigned value due to shadowing etc. + "nolintlint", # Reports ill-formed or insufficient nolint directives + # ensures that you don't typo nolint comments. and that you justify them with why you are ignoring a linter here. + "rowserrcheck", # checks whether Err of rows is checked successfully + # finds bugs in SQL code + "sqlclosecheck", # Checks that sql.Rows and sql.Stmt are closed. + # easy and finds bugs + "typecheck", # parses and type-checks Go code + # probably unnecessary, but shouldn't hurt anything + "wastedassign", # finds wasted assignment statements. + # can find bugs where you assign something but never use it + + # + # PR Scan - less critical, but should be fixed as we go along + # + + "deadcode", # Finds unused code + # dead code can be a bug or just confusing for the next dev + "depguard", # checks if package imports are in a list of acceptable packages + # this is useful for ensuring people use the company-standard packages for logging etc. + "errname", # Checks that sentinel errors are prefixed with the Err and error types are suffixed with the Error. + # This is standard practice and makes it easy to find error types and sentinels in the code. + "gochecknoinits", # Checks that no init functions are present in Go code + # init is bad, and is almost never necessary, nor is it a good idea. + "godot", # Check if comments end in a period + # this is a recommended Go style, and not only makes your doc comments look more + # professional, it ensures that you don't stop a comment in the middle and forget + # to write the end of it. + #"godox", # detects use of FIXME, TODO and other comment keywords + # These should be issues in an issue tracker, not comments in the code. + "gosimple", # tells you where you can simplify your code + # simple is good + "makezero", # checks that you don't accidentally make a slice w/ nonzero length and then append to it + # this can cause bugs where you make a slice of length 5 and then append 5 items to it, + # giving you a length of 10 where the first 5 are all zero values. + "misspell", # Finds commonly misspelled English words in comments + # we all suck at spelling and tpying + "nakedret", # Finds naked returns in functions greater than a specified function length + # naked returns are evil + #"nestif", # Reports deeply nested if statements + # deeply nested ifs are hard to read + "nilerr", # Finds the code that returns nil even if it checks that the error is not nil. + # finds fairly common bug + "noctx", # noctx finds sending http request without context.Context + # you should always use context so we can cancel external requests + "prealloc", # Finds slice declarations that could potentially be preallocated + # this can save some memory and copying, otherwise append guesses how big to make slices and may need to + # copy all items in a slice to a bigger one. + "predeclared", # find code that shadows one of Go's predeclared identifiers + # you can make a variable called "true", but it's a bad idea. + #"revive", # finds common style mistakes + # style and other mistakes that you really should listen to. + "staticcheck", # go vet on steroids, applying a ton of static analysis checks + # encompasses many linters in one, good stuff + "structcheck", # Finds unused struct fields + # can find bugs or trim unused fields to save memory + #"tparallel", # tparallel detects inappropriate usage of t.Parallel() + # likely a rare problem, but should have low false positives + "unconvert", # Remove unnecessary type conversions + # can save a little memory, unlikely to have false positives + "unused", # Checks for unused constants, variables, functions and types + # may have false positives, should watch this one + "varcheck", # Finds unused global variables and constants + # may have false positives, should watch this one + ] + + # we don't bother putting anything in disable, since we manually enable each linter. + # See the bottom of the file for disabled linters. + disable = [] + + +[run] + # options for analysis running + # Increase timeout from default 1m, first pre-cache run can take a bit in CI/CD + timeout = "5m" + + # default concurrency is the available CPU number + # concurrency = 4 + + # exit code when at least one issue was found, default is 1 + issues-exit-code = 1 + + # include test files or not, default is true + tests = true + + # list of build tags, all linters use it. Default is empty list. + build-tags = [] + + # which dirs to skip: issues from them won't be reported; + # can use regexp here: generated.*, regexp is applied on full path; + # default value is empty list, but default dirs are skipped independently + # from this option's value (see skip-dirs-use-default). + # "/" will be replaced by current OS file path separator to properly work + # on Windows. + skip-dirs = [] + + # default is true. Enables skipping of directories: + # vendor$, third_party$, testdata$, examples$, Godeps$, builtin$ + skip-dirs-use-default = true + + # which files to skip: they will be analyzed, but issues from them + # won't be reported. Default value is empty list, but there is + # no need to include all autogenerated files, we confidently recognize + # autogenerated files. If it's not please let us know. + # "/" will be replaced by current OS file path separator to properly work + # on Windows. + skip-files = [] + + # by default isn't set. If set we pass it to "go list -mod={option}". From "go help modules": + # If invoked with -mod=readonly, the go command is disallowed from the implicit + # automatic updating of go.mod described above. Instead, it fails when any changes + # to go.mod are needed. This setting is most useful to check that go.mod does + # not need updates, such as in a continuous integration and testing system. + # If invoked with -mod=vendor, the go command assumes that the vendor + # directory holds the correct copies of dependencies and ignores + # the dependency descriptions in go.mod. + modules-download-mode = "" + + # Allow multiple parallel golangci-lint instances running. + # If false (default) - golangci-lint acquires file lock on start. + allow-parallel-runners = false + + +[output] + # colored-line-number|line-number|json|tab|checkstyle|code-climate|junit-xml|github-actions + # default is "colored-line-number" + format = "colored-line-number" + + # print lines of code with issue, default is true + print-issued-lines = true + + # print linter name in the end of issue text, default is true + print-linter-name = true + + # make issues output unique by line, default is true + uniq-by-line = true + + # add a prefix to the output file references; default is no prefix + path-prefix = "" + + # sorts results by: filepath, line and column + sort-results = true + + + +# options to enable differentiating between error and warning severities +[severity] + # GitHub Actions annotations support error and warning only: + # https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions#setting-an-error-message + default-severity = "error" + + # If set to true severity-rules regular expressions become case sensitive. + # The default value is false. + case-sensitive = false + + # Default value is empty list. + # When a list of severity rules are provided, severity information will be added to lint + # issues. Severity rules have the same filtering capability as exclude rules except you + # are allowed to specify one matcher per severity rule. + # Only affects out formats that support setting severity information. + # [[severity.rules]] + # linters = [ + # "revive", + # ] + # severity = "warning" + +[issues] + # List of regexps of issue texts to exclude, empty list by default. + # Please document every exception here so we know what we're suppressing and why. + exclude = [ + # err113 doesn't like it when people use errors.New("abc"). + # That's kinda valid but also kind of a PITA if you don't actually want + # to define static errors everywhere, and no one actually depends on them. + ".*do not define dynamic errors, use wrapped static errors instead.*" + ] + + # Maximum issues count per one linter. Set to 0 to disable. Default is 50. + max-issues-per-linter = 0 + + # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. + max-same-issues = 0 + + # The default value is false. If set to true exclude and exclude-rules + # regular expressions become case sensitive. + # exclude-case-sensitive = false + + # This flag suppresses lint issues from several linters, overriding any other configuration you have set. + # It defaults to true. + # NEVER remove this configuration. If you want to suppress something, do so explicitly elsewhere. + exclude-use-default = false + + # The list of ids of default excludes to include or disable. By default it's empty. + # We shouldn't ever need this, since we turn off default excludes. + include = [] + + # Show only new issues: if there are unstaged changes or untracked files, + # only those changes are analyzed, else only changes in HEAD~ are analyzed. + # It's a super-useful option for integration of golangci-lint into existing + # large codebase. It's not practical to fix all existing issues at the moment + # of integration: much better don't allow issues in new code. + # Default is false. + new = false + + # Show only new issues created in git patch with set file path. + # new-from-patch = "path/to/patch/file" + + # Show only new issues created after git revision `REV` + # new-from-rev = "REV" + + # Fix found issues (if it's supported by the linter). Default is false. + fix = false + + # reduce noise in some linters that don't necessarily need to be run in tests + [[issues.exclude-rules]] + path = "_test\\.go" + linters = ["errcheck", "gosec", "gocyclo", "noctx", "govet"] + +# +# Specific Linter Settings +# + +[linters-settings.depguard] + # ban some modules with replacements + list-type = "blacklist" + include-go-root = true + packages = [ + # we shouldn't use pkg/error anymore + "github.com/pkg/error", + ] + + [[linters-settings.depguard.packages-with-error-message]] + "github.com/pkg/error" = "Please use stdlib errors module" + +[linters-settings.errcheck] + # report about not checking of errors in type assertions: `a := b.(MyStruct)`; + # default is false: such cases aren't reported by default. + check-type-assertions = true + + # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`; + # default is false: such cases aren't reported by default. + check-blank = false + + # path to a file containing a list of functions to exclude from checking + # see https://github.com/kisielk/errcheck#excluding-functions for details + exclude = "" + + # list of functions to exclude from checking, where each entry is a single function to exclude. + # see https://github.com/kisielk/errcheck#excluding-functions for details + exclude-functions = [] + +[linters-settings.errorlint] + # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats + errorf = true + # Check for plain type assertions and type switches + asserts = false + # Check for plain error comparisons + comparison = false + +[linters-settings.gocritic] + # Enable multiple checks by tags, run `GL_DEBUG=gocritic golangci-lint run` to see all tags and checks. + # Empty list by default. See https://github.com/go-critic/go-critic#usage -> section "Tags". + enabled-tags = [ + "diagnostic", + "performance", + "style", + ] + disabled-checks = [ + # import shadow warns if a variable shadow the name of an imported package. + # kind of noisy, doesn't actually hurt anything, just may be momentarily confusing. + "importShadow", + "preferStringWriter", + "paramTypeCombine", + "unnamedResult", + "emptyStringTest", + "elseif", + "whyNoLint", + ] + + # HugeParam: warn if passing huge parameters by value; consider passing pointers instead. + [linters-settings.gocritic.settings.hugeParam] + # increase threshold from default (80 bytes) to 256 bytes. + sizeThreshold = 256 + + + + +[linters-settings.goimports] + # Goimports checks whether code was formatted with goimports. + # uncomment if we want to enforce having GitHub-owned packages sorted into a separate section + #local-prefixes = "github.com/github/" + +[linters-settings.govet] + enable = [ "httpresponse" ] + +[linters-settings.gosec] + excludes = [ + "G301", # Expect directory permissions to be 0750 or less. See umask. + "G307", # deferring methods with errors. This duplicates errcheck, and I don't want to have to use two nolints. + ] + + +[linters-settings.nolintlint] + # adds some protections around nolint directives + + # Enable to ensure that nolint directives are all used. Default is true. + allow-unused = false + # Disable to ensure that nolint directives don't have a leading space. Default is true. + allow-leading-space = false + # Exclude following linters from requiring an explanation. Default is []. + allow-no-explanation = [] + # Enable to require an explanation of nonzero length after each nolint directive. Default is false. + require-explanation = false + # Enable to require nolint directives to mention the specific linter being suppressed. Default is false. + require-specific = true + + + + +# List of linters supported by golangci-lint that we intentionally do not use. +# Intentionally formatted the same as the "enabled" list, so you can just move one +# up to that list to enable it. +# list is in the form + # "name", # description + # reason to disable + + + # "asciicheck", # checks that your code does not contain non-ASCII identifiers + # Honestly not sure why anyone cares? + # "cyclop", # checks function and package cyclomatic complexity + # Too hard to know when you trip over this, and I feel like it needs a human + # to understand if a function is too complex. + # "dogsled", # Checks assignments with too many blank identifiers (e.g. x, _, _, _, := f()) + # This doesn't seem to be a common problem, nor a source of bugs. It would be + # better to have a linter that just tells you not to return 4 things in the + # first place. + # "dupl", # Tool for code clone detection + # This feels too likely to have high false positives on trivial code, and miss + # more complicated duplicates. + # "exhaustive", # checks exhaustiveness of enum switch statements + # This tends to hit a lot of false positives, and can lead to a lot of nolint statements. + # Definitely could be useful for specific repos of focused libraries where you know you + # update enums a lot, and want to make sure your switch statements stay up to date. + # "exhaustivestruct", # Checks if all struct's fields are initialized + # This is generally a feature, not a bug. Requiring a //nolint whenever you partially + # initialize a struct would be pretty annoying. + # "forbidigo", # Can be configured to forbids specific identifiers, like fmt.Printf, for example. + # This can actually be really useful, but needs a deep understanding of patterns + # we want devs to avoid in our specific repos. Definitely look into it if you have + # a list of "don't use XYZ" items. + # "funlen", # Tool for detection of long functions + # We could maybe put this in with a pretty big size limit, but it feels like it would be + # of limited benefit and cause grumbling. + # "gci", # control golang package import order and make it always deterministic + # I haven't really had a problem with this, when using goimports, so I'm not sure it's useful. + # "gochecknoglobals", # check that no global variables exist + # this is actually good to have on, but I'm afraid it would cause more heartburn than good. + # "gocognit", # Computes and checks the cognitive complexity of functions + # Too hard to know when you trip over this, and I feel like it needs a human + # to understand if a function is too complex. + # "goconst", # Finds repeated strings that could be replaced by a constant + # magic strings are bad, but I feel like this could reduce adoption of the linter. + # "gofmt", # checks whether code was gofmt-ed. + # use goimports instead, they have slightly different formatting. + # "gofumpt", # checks whether code is gofumpt-ed + # use goimports instead, they have slightly different formatting. + # "goheader", # checks if file header matches a pattern + # useful for companies that mandate a copyright header on every file. That's not github. + # "golint", # unmaintained + # "gomnd", # an analyzer to detect magic numbers + # just too noisy + # "ifshort", # makes sure you use if err := foo(); err != nil + # this is really more personal preference, and sometimes can hinder readability. + # "importas", # enforces consistent import aliases + # this is kind of a special case for avoiding import collisions, and not really needed for us. + # "interfacer", # unmaintined + # "lll" # reports long lines + # duplicated by other checks + # "nlreturn", # nlreturn checks for a new line before return and branch statements to increase code clarity + # I'm not a monster, newline if you like, or not. + # "paralleltest", # paralleltest detects missing usage of t.Parallel() method in your Go test + # parallel tests are good, but packages are already run in parallel, so it's not a huge gain. + # "promlinter", # Check Prometheus metrics naming via promlint + # enable if you use prometheus + # "scopelint", # unmaintained + # "tagliatelle", # Checks that struct tags match a certain format (camelcase, snakecase etc) + # likely to cause a lot of false positives if you're making tags for other people's APIs + # "testpackage", # makes you use a separate _test package + # I actually think this is a bad idea in general, and I would want a linter that does the opposite. + # "thelper", # detects golang test helpers without t.Helper() + # t.Helper is sometimes useful and sometimes not. + # "unparam", # Reports unused function parameters + # seems likely to have false positives + # "whitespace", # finds extra newlines at the beginning of functions and if statements + # I like this, but I feel like it would be too nitpicky for most people + # "wrapcheck", # Checks that errors returned from external packages are wrapped + # I mean, yeah, but you don't *always* need to wrap, that gets excesssive. + # "wsl", # Whitespace Linter - Forces you to use empty lines! + # meh, I'm not that much of a control freak diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..9b79bdd --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @github/git-storage-reviewers diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bce8fc2..f4427e8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ Please note that this project is released with a [Contributor Code of Conduct][c Here are a few things you can do that will increase the likelihood of your pull request being accepted: -- Make sure that your code is formatted correctly: `make gofmt`. +- Make sure that your code is formatted correctly according to `go fmt`: `go fmt .`. - Write tests. - Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests. - Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html). diff --git a/Makefile b/Makefile index cf910f1..ff80eaf 100644 --- a/Makefile +++ b/Makefile @@ -3,30 +3,21 @@ GO111MODULES := 1 export GO111MODULES GO := $(CURDIR)/script/go -GOFMT := $(CURDIR)/script/gofmt GO_LDFLAGS := -X main.BuildVersion=$(shell git describe --tags --always --dirty || echo unknown) -GOFLAGS := -ldflags "$(GO_LDFLAGS)" +GOFLAGS := -mod=readonly -ldflags "$(GO_LDFLAGS)" ifdef USE_ISATTY GOFLAGS := $(GOFLAGS) --tags isatty endif -GO_SRCS := $(sort $(shell $(GO) list -f ' \ - {{$$ip := .Dir}} \ - {{range .GoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .CgoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .TestGoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .XTestGoFiles}}{{printf "%s/%s\n" $$ip .}}{{end}} \ - ' ./...)) - .PHONY: all all: bin/git-sizer .PHONY: bin/git-sizer bin/git-sizer: mkdir -p bin - $(GO) build $(GOFLAGS) -o $@ $(PACKAGE) + $(GO) build $(GOFLAGS) -o $@ . # Cross-compile for a bunch of common platforms. Note that this # doesn't work with USE_ISATTY: @@ -50,7 +41,7 @@ define PLATFORM_template = .PHONY: bin/git-sizer-$(1)-$(2)$(3) bin/git-sizer-$(1)-$(2)$(3): mkdir -p bin - GOOS=$(1) GOARCH=$(2) $$(GO) build $$(GOFLAGS) -ldflags "-X main.ReleaseVersion=$$(VERSION)" -o $$@ $$(PACKAGE) + GOOS=$(1) GOARCH=$(2) $$(GO) build $$(GOFLAGS) -ldflags "-X main.ReleaseVersion=$$(VERSION)" -o $$@ . common-platforms: bin/git-sizer-$(1)-$(2)$(3) # Note that releases don't include code from vendor (they're only used @@ -72,8 +63,8 @@ endef $(eval $(call PLATFORM_template,linux,amd64)) $(eval $(call PLATFORM_template,linux,386)) -$(eval $(call PLATFORM_template,darwin,386)) $(eval $(call PLATFORM_template,darwin,amd64)) +$(eval $(call PLATFORM_template,darwin,arm64)) $(eval $(call PLATFORM_template,windows,amd64,.exe)) $(eval $(call PLATFORM_template,windows,386,.exe)) @@ -85,23 +76,6 @@ test: bin/git-sizer gotest gotest: $(GO) test -timeout 60s $(GOFLAGS) ./... -.PHONY: gofmt -gofmt: - $(GOFMT) -l -w $(GO_SRCS) | sed -e 's/^/Fixing /' - -.PHONY: goimports -goimports: - goimports -l -w -e $(GO_SRCS) - -.PHONY: govet -govet: - $(GO) vet ./... - .PHONY: clean clean: rm -rf bin - -# List all of this project's Go sources: -.PHONY: srcs -srcs: - @printf "%s\n" $(GO_SRCS) diff --git a/counts/counts.go b/counts/counts.go index 580dc7b..3961256 100644 --- a/counts/counts.go +++ b/counts/counts.go @@ -4,9 +4,11 @@ import ( "math" ) -// A count of something, capped at math.MaxUint32. +// Count32 is a count of something, capped at math.MaxUint32. type Count32 uint32 +// NewCount32 initializes a Count32 from a uint64, capped at +// math.MaxUint32. func NewCount32(n uint64) Count32 { if n > math.MaxUint32 { return Count32(math.MaxUint32) @@ -14,11 +16,13 @@ func NewCount32(n uint64) Count32 { return Count32(n) } +// ToUint64 returns the value of `n` as a `uint64`. If the value has +// overflowed, it returns `(math.MaxUint32, true)`. func (n Count32) ToUint64() (uint64, bool) { return uint64(n), n == math.MaxUint32 } -// Return the sum of two Count32s, capped at math.MaxUint32. +// Plus returns the sum of two Count32s, capped at math.MaxUint32. func (n1 Count32) Plus(n2 Count32) Count32 { n := n1 + n2 if n < n1 { @@ -28,7 +32,7 @@ func (n1 Count32) Plus(n2 Count32) Count32 { return n } -// Increment `*n1` by `n2`, capped at math.MaxUint32. +// Increment increases `*n1` by `n2`, capped at math.MaxUint32. func (n1 *Count32) Increment(n2 Count32) { *n1 = n1.Plus(n2) } @@ -36,37 +40,40 @@ func (n1 *Count32) Increment(n2 Count32) { // AdjustMaxIfNecessary adjusts `*n1` to be `max(*n1, n2)`. Return // true iff `n2` was greater than `*n1`. func (n1 *Count32) AdjustMaxIfNecessary(n2 Count32) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } // AdjustMaxIfPossible adjusts `*n1` to be `max(*n1, n2)`. Return true // iff `n2` was greater than or equal to `*n1`. func (n1 *Count32) AdjustMaxIfPossible(n2 Count32) bool { - if n2 >= *n1 { - *n1 = n2 - return true - } else { + if n2 < *n1 { return false } + + *n1 = n2 + return true } -// A count of something, capped at math.MaxUint64. +// Count64 is a count of something, capped at math.MaxUint64. type Count64 uint64 +// NewCount64 initializes a Count64 from a uint64. func NewCount64(n uint64) Count64 { return Count64(n) } +// ToUint64 returns the value of `n` as a `uint64`. If the value has +// overflowed, it returns `(math.MaxUint64, true)`. func (n Count64) ToUint64() (uint64, bool) { return uint64(n), n == math.MaxUint64 } -// Return the sum of two Count64s, capped at math.MaxUint64. +// Plus returns the sum of two Count64s, capped at math.MaxUint64. func (n1 Count64) Plus(n2 Count64) Count64 { n := n1 + n2 if n < n1 { @@ -76,7 +83,7 @@ func (n1 Count64) Plus(n2 Count64) Count64 { return n } -// Increment `*n1` by `n2`, capped at math.MaxUint64. +// Increment increases `*n1` by `n2`, capped at math.MaxUint64. func (n1 *Count64) Increment(n2 Count64) { *n1 = n1.Plus(n2) } @@ -84,21 +91,21 @@ func (n1 *Count64) Increment(n2 Count64) { // AdjustMaxIfNecessary adjusts `*n1` to be `max(*n1, n2)`. Return // true iff `n2` was greater than `*n1`. func (n1 *Count64) AdjustMaxIfNecessary(n2 Count64) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } // AdjustMaxIfPossible adjusts `*n1` to be `max(*n1, n2)`. Return true // iff `n2` was greater than or equal to `*n1`. func (n1 *Count64) AdjustMaxIfPossible(n2 Count64) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } diff --git a/counts/human.go b/counts/human.go index cc69d50..ae75838 100644 --- a/counts/human.go +++ b/counts/human.go @@ -4,24 +4,28 @@ import ( "fmt" ) -// A quantity that can be made human-readable using Human(). +// Humanable is a quantity that can be made human-readable using +// `Humaner.Format()`. type Humanable interface { - // Return the value as a uint64, and a boolean telling whether it - // overflowed. + // ToUint64 returns the value as a uint64, and a boolean telling + // whether it overflowed. ToUint64() (uint64, bool) } -// An object that can format a Humanable in human-readable format. +// Humaner is an object that can format a Humanable in human-readable +// format. type Humaner struct { name string prefixes []Prefix } +// Prefix is a metric-like prefix that implies a scaling factor. type Prefix struct { Name string Multiplier uint64 } +// Metric is a Humaner representing metric prefixes. var Metric = Humaner{ name: "metric", prefixes: []Prefix{ @@ -34,6 +38,8 @@ var Metric = Humaner{ }, } +// Binary is a Humaner representing power-of-1024 based prefixes, +// typically used for bytes. var Binary = Humaner{ name: "binary", prefixes: []Prefix{ @@ -46,13 +52,15 @@ var Binary = Humaner{ }, } +// Name returns the name of `h` ("metric" or "binary"). func (h *Humaner) Name() string { return h.name } -// Format n, aligned, in `len(unit) + 10` or fewer characters (except -// for extremely large numbers). -func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { +// FormatNumber formats n, aligned, in `len(unit) + 10` or fewer +// characters (except for extremely large numbers). It returns strings +// representing the numeral and the unit string. +func (h *Humaner) FormatNumber(n uint64, unit string) (numeral string, unitString string) { prefix := h.prefixes[0] wholePart := n @@ -66,25 +74,28 @@ func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { if prefix.Multiplier == 1 { return fmt.Sprintf("%d", n), unit - } else { - mantissa := float64(n) / float64(prefix.Multiplier) - var format string + } - if wholePart >= 100 { - // `mantissa` can actually be up to 1023.999. - format = "%.0f" - } else if wholePart >= 10 { - format = "%.1f" - } else { - format = "%.2f" - } - return fmt.Sprintf(format, mantissa), prefix.Name + unit + mantissa := float64(n) / float64(prefix.Multiplier) + var format string + + switch { + case wholePart >= 100: + // `mantissa` can actually be up to 1023.999. + format = "%.0f" + case wholePart >= 10: + format = "%.1f" + default: + format = "%.2f" } + + return fmt.Sprintf(format, mantissa), prefix.Name + unit } -// Format values, aligned, in `len(unit) + 10` or fewer characters -// (except for extremely large numbers). -func (h *Humaner) Format(value Humanable, unit string) (string, string) { +// Format formats values, aligned, in `len(unit) + 10` or fewer +// characters (except for extremely large numbers). It returns strings +// representing the numeral and the unit string. +func (h *Humaner) Format(value Humanable, unit string) (numeral string, unitString string) { n, overflow := value.ToUint64() if overflow { return "∞", unit diff --git a/docs/BUILDING.md b/docs/BUILDING.md index d215c80..7f9fdef 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -7,11 +7,11 @@ Most people can just install a released version of `git-sizer`, [as described in 1. Make sure that you have a recent version of the [Go language toolchain](https://golang.org/doc/install) installed and that you have set `GOPATH`. -2. Get `git-sizer` using `go get`: +2. Get `git-sizer` using `go install`: - go get github.com/github/git-sizer + go install github.com/github/git-sizer@latest - This should fetch and compile the source code and write the executable file to `$GOPATH/bin/`. + This should install the executable file to `$GOPATH/bin/`. 3. Either add `$GOPATH/bin` to your `PATH`, or copy the executable file (`git-sizer` or `git-sizer.exe`) to a directory that is already in your `PATH`. @@ -25,7 +25,7 @@ This procedure is intended for experts and people who want to help develop `git- git clone https://github.com/github/git-sizer.git cd git-sizer -2. Install Go if necessary and create and prepare a project-local `GOPATH`: +2. Install Go if necessary: script/bootstrap diff --git a/git-sizer.go b/git-sizer.go index f84c42a..1ef9812 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -8,68 +9,137 @@ import ( "os" "runtime/pprof" "strconv" + "time" + + "github.com/spf13/pflag" "github.com/github/git-sizer/git" + "github.com/github/git-sizer/internal/refopts" "github.com/github/git-sizer/isatty" + "github.com/github/git-sizer/meter" "github.com/github/git-sizer/sizes" - - "github.com/spf13/pflag" ) +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. + + --threshold THRESHOLD minimum level of concern (i.e., number of stars) + that should be reported. Default: + '--threshold=1'. Can be set via gitconfig: + 'sizer.threshold'. + -v, --verbose report all statistics, whether concerning or + not; equivalent to '--threshold=0 + --no-verbose equivalent to '--threshold=1' + --critical only report critical statistics; equivalent + to '--threshold=30' + --names=[none|hash|full] display names of large objects in the specified + style. Values: + * 'none' - omit footnotes entirely + * 'hash' - show only the SHA-1s of objects + * 'full' - show full names + Default is '--names=full'. Can be set via + gitconfig: 'sizer.names'. + -j, --json output results in JSON format + --json-version=[1|2] choose which JSON format version to output. + Default: --json-version=1. Can be set via + gitconfig: 'sizer.jsonVersion'. + --[no-]progress report (don't report) progress to stderr. Can + be set via gitconfig: 'sizer.progress'. + --version only report the git-sizer version number + + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + + Reference selection: + + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. + + --[no-]branches process [don't process] branches + --[no-]tags process [don't process] tags + --[no-]remotes process [don't process] remote-tracking + references + --[no-]notes process [don't process] git-notes references + --[no-]stash process [don't process] refs/stash + --include PREFIX, --exclude PREFIX + process [don't process] references with the + specified PREFIX (e.g., + '--include=refs/remotes/origin') + --include /REGEXP/, --exclude /REGEXP/ + process [don't process] references matching the + specified regular expression (e.g., + '--include=refs/tags/release-.*') + --include @REFGROUP, --exclude @REFGROUP + process [don't process] references in the + specified reference group (see below) + --show-refs show which refs are being included/excluded + + PREFIX must match at a boundary; for example 'refs/foo' matches + 'refs/foo' and 'refs/foo/bar' but not 'refs/foobar'. + + REGEXP patterns must match the full reference name. + + REFGROUP can be the name of a predefined reference group ('branches', + 'tags', 'remotes', 'pulls', 'changes', 'notes', or 'stash'), or one + defined via gitconfig settings like the following (the + include/exclude settings can be repeated): + + * 'refgroup.REFGROUP.name=NAME' + * 'refgroup.REFGROUP.include=PREFIX' + * 'refgroup.REFGROUP.includeRegexp=REGEXP' + * 'refgroup.REFGROUP.exclude=PREFIX' + * 'refgroup.REFGROUP.excludeRegexp=REGEXP' + +` + var ReleaseVersion string var BuildVersion string -type NegatedBoolValue struct { - value *bool -} - -func (b *NegatedBoolValue) Set(s string) error { - v, err := strconv.ParseBool(s) - *b.value = !v - return err -} - -func (b *NegatedBoolValue) Get() interface{} { - return !*b.value -} - -func (b *NegatedBoolValue) String() string { - if b == nil || b.value == nil { - return "true" - } else { - return strconv.FormatBool(!*b.value) - } -} - -func (v *NegatedBoolValue) Type() string { - return "bool" -} - func main() { - err := mainImplementation() + ctx := context.Background() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation() error { - var processBranches bool - var processTags bool - var processRemotes bool +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool - var jsonVersion uint + var jsonVersion int var threshold sizes.Threshold = 1 var progress bool var version bool + var showRefs bool - flags := pflag.NewFlagSet("", pflag.ContinueOnError) + // Try to open the repository, but it's not an error yet if this + // fails, because the user might only be asking for `--help`. + repo, repoErr := git.NewRepositoryFromPath(".") - flags.BoolVar(&processBranches, "branches", false, "process all branches") - flags.BoolVar(&processTags, "tags", false, "process all tags") - flags.BoolVar(&processRemotes, "remotes", false, "process all remote-tracking branches") + flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) + flags.Usage = func() { + fmt.Fprint(stdout, usage) + } flags.VarP( sizes.NewThresholdFlagValue(&threshold, 0), @@ -77,6 +147,12 @@ func mainImplementation() error { ) flags.Lookup("verbose").NoOptDefVal = "true" + flags.Var( + sizes.NewThresholdFlagValue(&threshold, 1), + "no-verbose", "report statistics that are at all concerning", + ) + flags.Lookup("no-verbose").NoOptDefVal = "true" + flags.Var( &threshold, "threshold", "minimum level of concern (i.e., number of stars) that should be\n"+ @@ -98,83 +174,161 @@ func mainImplementation() error { ) flags.BoolVarP(&jsonOutput, "json", "j", false, "output results in JSON format") - flags.UintVar(&jsonVersion, "json-version", 1, "JSON format version to output (1 or 2)") + flags.IntVar(&jsonVersion, "json-version", 1, "JSON format version to output (1 or 2)") - atty, err := isatty.Isatty(os.Stderr.Fd()) - if err != nil { - atty = false + defaultProgress := false + if f, ok := stderr.(*os.File); ok { + atty, err := isatty.Isatty(f.Fd()) + if err == nil && atty { + defaultProgress = true + } } - flags.BoolVar(&progress, "progress", atty, "report progress to stderr") + + flags.BoolVar(&progress, "progress", defaultProgress, "report progress to stderr") flags.BoolVar(&version, "version", false, "report the git-sizer version number") flags.Var(&NegatedBoolValue{&progress}, "no-progress", "suppress progress output") flags.Lookup("no-progress").NoOptDefVal = "true" flags.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") - flags.MarkHidden("cpuprofile") + if err := flags.MarkHidden("cpuprofile"); err != nil { + return fmt.Errorf("marking option hidden: %w", err) + } - flags.SortFlags = false + var configger refopts.Configger + if repo != nil { + configger = repo + } - err = flags.Parse(os.Args[1:]) + rgb, err := refopts.NewRefGroupBuilder(configger) if err != nil { return err } - if jsonOutput && !(jsonVersion == 1 || jsonVersion == 2) { - return fmt.Errorf("JSON version must be 1 or 2") + rgb.AddRefopts(flags) + + flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed") + + flags.SortFlags = false + + err = flags.Parse(args) + if err != nil { + if errors.Is(err, pflag.ErrHelp) { + return nil + } + return err } if cpuprofile != "" { f, err := os.Create(cpuprofile) if err != nil { - return fmt.Errorf("couldn't set up cpuprofile file: %s", err) + return fmt.Errorf("couldn't set up cpuprofile file: %w", err) + } + if err := pprof.StartCPUProfile(f); err != nil { + return fmt.Errorf("starting CPU profiling: %w", err) } - pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if version { if ReleaseVersion != "" { - fmt.Printf("git-sizer release %s\n", ReleaseVersion) + fmt.Fprintf(stdout, "git-sizer release %s\n", ReleaseVersion) } else { - fmt.Printf("git-sizer build %s\n", BuildVersion) + fmt.Fprintf(stdout, "git-sizer build %s\n", BuildVersion) } return nil } - args := flags.Args() - - if len(args) != 0 { - return errors.New("excess arguments") + if repoErr != nil { + return fmt.Errorf("couldn't open Git repository: %w", repoErr) } - repo, err := git.NewRepository(".") - if err != nil { - return fmt.Errorf("couldn't open Git repository: %s", err) + if jsonOutput { + if !flags.Changed("json-version") { + v, err := repo.ConfigIntDefault("sizer.jsonVersion", jsonVersion) + if err != nil { + return err + } + jsonVersion = v + if !(jsonVersion == 1 || jsonVersion == 2) { + return fmt.Errorf("JSON version (read from gitconfig) must be 1 or 2") + } + } else if !(jsonVersion == 1 || jsonVersion == 2) { + return fmt.Errorf("JSON version must be 1 or 2") + } } - defer repo.Close() - var historySize sizes.HistorySize + if !flags.Changed("threshold") && + !flags.Changed("verbose") && + !flags.Changed("no-verbose") && + !flags.Changed("critical") { + s, err := repo.ConfigStringDefault("sizer.threshold", fmt.Sprintf("%g", threshold)) + if err != nil { + return err + } + v, err := strconv.ParseFloat(s, 64) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.threshold': %w", err) + } + threshold = sizes.Threshold(v) + } - var filter git.ReferenceFilter - if processBranches || processTags || processRemotes { - var filters []git.ReferenceFilter - if processBranches { - filters = append(filters, git.BranchesFilter) + if !flags.Changed("names") { + s, err := repo.ConfigStringDefault("sizer.names", "full") + if err != nil { + return err } - if processTags { - filters = append(filters, git.TagsFilter) + err = nameStyle.Set(s) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.names': %w", err) } - if processRemotes { - filters = append(filters, git.RemotesFilter) + } + + if !flags.Changed("progress") && !flags.Changed("no-progress") { + v, err := repo.ConfigBoolDefault("sizer.progress", progress) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.progress': %w", err) } - filter = git.OrFilter(filters...) - } else { - filter = git.AllReferencesFilter + progress = v } - historySize, err = sizes.ScanRepositoryUsingGraph(repo, filter, nameStyle, progress) + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { - return fmt.Errorf("error scanning repository: %s", err) + return err + } + + if showRefs { + fmt.Fprintf(stderr, "References (included references marked with '+'):\n") + rg = refopts.NewShowRefGrouper(rg, stderr) + } + + var progressMeter meter.Progress = meter.NoProgressMeter + if progress { + progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) + } + + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, nameStyle, progressMeter, + ) + if err != nil { + return fmt.Errorf("error scanning repository: %w", err) } if jsonOutput { @@ -184,16 +338,20 @@ func mainImplementation() error { case 1: j, err = json.MarshalIndent(historySize, "", " ") case 2: - j, err = historySize.JSON(threshold, nameStyle) + j, err = historySize.JSON(rg.Groups(), threshold, nameStyle) default: return fmt.Errorf("JSON version must be 1 or 2") } if err != nil { - return fmt.Errorf("could not convert %v to json: %s", historySize, err) + return fmt.Errorf("could not convert %v to json: %w", historySize, err) } - fmt.Printf("%s\n", j) + fmt.Fprintf(stdout, "%s\n", j) } else { - io.WriteString(os.Stdout, historySize.TableString(threshold, nameStyle)) + if _, err := io.WriteString( + stdout, historySize.TableString(rg.Groups(), threshold, nameStyle), + ); err != nil { + return fmt.Errorf("writing output: %w", err) + } } return nil diff --git a/git/batch_header.go b/git/batch_header.go new file mode 100644 index 0000000..2500d4e --- /dev/null +++ b/git/batch_header.go @@ -0,0 +1,47 @@ +package git + +import ( + "fmt" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +type BatchHeader struct { + OID OID + ObjectType ObjectType + ObjectSize counts.Count32 +} + +var missingHeader = BatchHeader{ + ObjectType: "missing", +} + +// Parse a `cat-file --batch[-check]` output header line (including +// the trailing LF). `spec`, if not "", is used in error messages. +func ParseBatchHeader(spec string, header string) (BatchHeader, error) { + header = header[:len(header)-1] + words := strings.Split(header, " ") + if words[len(words)-1] == "missing" { + if spec == "" { + spec = words[0] + } + return missingHeader, fmt.Errorf("missing object %s", spec) + } + + oid, err := NewOID(words[0]) + if err != nil { + return missingHeader, err + } + + size, err := strconv.ParseUint(words[2], 10, 0) + if err != nil { + return missingHeader, err + } + return BatchHeader{ + OID: oid, + ObjectType: ObjectType(words[1]), + ObjectSize: counts.NewCount32(size), + }, nil +} diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go new file mode 100644 index 0000000..05c6928 --- /dev/null +++ b/git/batch_obj_iter.go @@ -0,0 +1,156 @@ +package git + +import ( + "bufio" + "context" + "fmt" + "io" + + "github.com/github/go-pipe/pipe" +) + +type ObjectRecord struct { + BatchHeader + Data []byte +} + +// BatchObjectIter iterates over objects whose names are fed into its +// stdin. The output is buffered, so it has to be closed before you +// can be sure that you have gotten all of the objects. +type BatchObjectIter struct { + ctx context.Context + p *pipe.Pipeline + oidCh chan OID + objCh chan ObjectRecord + errCh chan error +} + +// NewBatchObjectIter returns a `*BatchObjectIterator` and an +// `io.WriteCloser`. The iterator iterates over objects whose names +// are fed into the `io.WriteCloser`, one per line. The +// `io.WriteCloser` should normally be closed and the iterator's +// output drained before `Close()` is called. +func (repo *Repository) NewBatchObjectIter(ctx context.Context) (*BatchObjectIter, error) { + iter := BatchObjectIter{ + ctx: ctx, + p: pipe.New(), + oidCh: make(chan OID), + objCh: make(chan ObjectRecord), + errCh: make(chan error), + } + + iter.p.Add( + // Read OIDs from `iter.oidCh` and write them to `git + // cat-file`: + pipe.Function( + "request-objects", + func(ctx context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { + out := bufio.NewWriter(stdout) + + for { + select { + case oid, ok := <-iter.oidCh: + if !ok { + return out.Flush() + } + if _, err := fmt.Fprintln(out, oid.String()); err != nil { + return fmt.Errorf("writing to 'git cat-file': %w", err) + } + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + + // Read OIDs from `stdin` and output a header line followed by + // the contents of the corresponding Git objects: + pipe.CommandStage( + "git-cat-file", + repo.GitCommand("cat-file", "--batch", "--buffer"), + ), + + // Parse the object headers and read the object contents, and + // shove both into `objCh`: + pipe.Function( + "object-reader", + func(ctx context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + defer close(iter.objCh) + + f := bufio.NewReader(stdin) + + for { + header, err := f.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading from 'git cat-file': %w", err) + } + batchHeader, err := ParseBatchHeader("", header) + if err != nil { + return fmt.Errorf("parsing output of 'git cat-file': %w", err) + } + + // Read the object contents plus the trailing LF + // (which is discarded below while creating the + // `ObjectRecord`): + data := make([]byte, batchHeader.ObjectSize+1) + if _, err := io.ReadFull(f, data); err != nil { + return fmt.Errorf( + "reading object data from 'git cat-file' for %s '%s': %w", + batchHeader.ObjectType, batchHeader.OID, err, + ) + } + + select { + case iter.objCh <- ObjectRecord{ + BatchHeader: batchHeader, + Data: data[:batchHeader.ObjectSize], + }: + case <-iter.ctx.Done(): + return iter.ctx.Err() + } + } + }, + ), + ) + + if err := iter.p.Start(ctx); err != nil { + return nil, err + } + + return &iter, nil +} + +// RequestObject requests that the object with the specified `oid` be +// processed. The objects registered via this method can be read using +// `Next()` in the order that they were requested. +func (iter *BatchObjectIter) RequestObject(oid OID) error { + select { + case iter.oidCh <- oid: + return nil + case <-iter.ctx.Done(): + return iter.ctx.Err() + } +} + +// Close closes the iterator and frees up resources. Close must be +// called exactly once. +func (iter *BatchObjectIter) Close() { + close(iter.oidCh) +} + +// Next either returns the next object (its header and contents), or a +// `false` boolean value if no more objects are left. Objects need to +// be read asynchronously, but the last objects won't necessarily show +// up here until `Close()` has been called. +func (iter *BatchObjectIter) Next() (ObjectRecord, bool, error) { + obj, ok := <-iter.objCh + if !ok { + return ObjectRecord{ + BatchHeader: missingHeader, + }, false, iter.p.Wait() + } + return obj, true, nil +} diff --git a/git/commit.go b/git/commit.go new file mode 100644 index 0000000..5e46ee7 --- /dev/null +++ b/git/commit.go @@ -0,0 +1,57 @@ +package git + +import ( + "fmt" + + "github.com/github/git-sizer/counts" +) + +// Commit represents the parts of a commit object that we need. +type Commit struct { + Size counts.Count32 + Parents []OID + Tree OID +} + +// ParseCommit parses the commit object whose contents are in `data`. +// `oid` is used only in error messages. +func ParseCommit(oid OID, data []byte) (*Commit, error) { + var parents []OID + var tree OID + var treeFound bool + iter, err := NewObjectHeaderIter(oid.String(), data) + if err != nil { + return nil, err + } + for iter.HasNext() { + key, value, err := iter.Next() + if err != nil { + return nil, err + } + switch key { + case "parent": + parent, err := NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed parent header in commit %s", oid) + } + parents = append(parents, parent) + case "tree": + if treeFound { + return nil, fmt.Errorf("multiple trees found in commit %s", oid) + } + tree, err = NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed tree header in commit %s", oid) + } + treeFound = true + } + } + if !treeFound { + return nil, fmt.Errorf("no tree found in commit %s", oid) + } + return &Commit{ + Size: counts.NewCount32(uint64(len(data))), + Parents: parents, + Tree: tree, + }, nil +} diff --git a/git/git.go b/git/git.go index 7883cf9..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -1,93 +1,106 @@ package git import ( - "bufio" "bytes" - "encoding/hex" "errors" "fmt" - "io" - "io/ioutil" + "io/fs" "os" "os/exec" "path/filepath" - "strconv" - "strings" - - "github.com/github/git-sizer/counts" ) -// The type of an object ("blob", "tree", "commit", "tag", "missing"). +// ObjectType represents the type of a Git object ("blob", "tree", +// "commit", "tag", or "missing"). type ObjectType string -type OID struct { - v [20]byte -} - -var NullOID OID - -func OIDFromBytes(oidBytes []byte) (OID, error) { - var oid OID - if len(oidBytes) != len(oid.v) { - return OID{}, errors.New("bytes oid has the wrong length") +// Repository represents a Git repository on disk. +type Repository struct { + // gitDir is the path to the `GIT_DIR` for this repository. It + // might be absolute or it might be relative to the current + // directory. + gitDir string + + // gitBin is the path of the `git` executable that should be used + // when running commands in this repository. + gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo +} + +// smartJoin returns `relPath` if it is an absolute path. If not, it +// assumes that `relPath` is relative to `path`, so it joins them +// together and returns the result. In that case, if `path` itself is +// relative, then the return value is also relative. +func smartJoin(path, relPath string) string { + if filepath.IsAbs(relPath) { + return relPath } - copy(oid.v[0:20], oidBytes) - return oid, nil + return filepath.Join(path, relPath) } -func NewOID(s string) (OID, error) { - oidBytes, err := hex.DecodeString(s) +// NewRepositoryFromGitDir creates a new `Repository` object that can +// be used for running `git` commands, given the value of `GIT_DIR` +// for the repository. +func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { + // Find the `git` executable to be used: + gitBin, err := findGitBin() if err != nil { - return OID{}, err + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) } - return OIDFromBytes(oidBytes) -} -func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) -} + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } -func (oid OID) Bytes() []byte { - return oid.v[:] -} + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, + } -func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] - dst := make([]byte, hex.EncodedLen(len(src))+2) - dst[0] = '"' - dst[len(dst)-1] = '"' - hex.Encode(dst[1:len(dst)-1], src) - return dst, nil -} + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } -type Repository struct { - path string + return &repo, nil } -// smartJoin returns the path that can be described as `relPath` -// relative to `path`, given that `path` is either absolute or is -// relative to the current directory. -func smartJoin(path, relPath string) string { - if filepath.IsAbs(relPath) { - return relPath +// NewRepositoryFromPath creates a new `Repository` object that can be +// used for running `git` commands within `path`. It does so by asking +// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on +// the path and the environment. +func NewRepositoryFromPath(path string) (*Repository, error) { + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) } - return filepath.Join(path, relPath) -} -func NewRepository(path string) (*Repository, error) { - cmd := exec.Command("git", "-C", path, "rev-parse", "--git-dir") + //nolint:gosec // `gitBin` is chosen carefully, and `path` is the + // path to the repository. + cmd := exec.Command(gitBin, "-C", path, "rev-parse", "--git-dir") out, err := cmd.Output() if err != nil { switch err := err.(type) { case *exec.Error: return nil, fmt.Errorf( - "could not run git (is it in your PATH?): %s", - err.Err, + "could not run '%s': %w", gitBin, err.Err, ) case *exec.ExitError: return nil, fmt.Errorf( - "git rev-parse failed: %s", - err.Stderr, + "git rev-parse failed: %s", err.Stderr, ) default: return nil, err @@ -95,24 +108,31 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - cmd = exec.Command("git", "rev-parse", "--git-path", "shallow") - cmd.Dir = gitDir - out, err = cmd.Output() + return NewRepositoryFromGitDir(gitDir) +} + +// IsFull returns `true` iff `repo` appears to be a full clone. +func (repo *Repository) IsFull() (bool, error) { + shallow, err := repo.GitPath("shallow") if err != nil { - return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %s", err, - ) + return false, err } - shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) + _, err = os.Lstat(shallow) if err == nil { - return nil, errors.New("this appears to be a shallow clone; full clone required") + return false, nil + } + + if !errors.Is(err, fs.ErrNotExist) { + return false, err } - return &Repository{path: gitDir}, nil + // The `shallow` file is absent, which is what we expect + // for a full clone. + return true, nil } -func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { +func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { args := []string{ // Disable replace references when running our commands: "--no-replace-objects", @@ -125,11 +145,13 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { args = append(args, callerArgs...) - cmd := exec.Command("git", args...) + //nolint:gosec // `gitBin` is chosen carefully, and the rest of + // the args have been checked. + cmd := exec.Command(repo.gitBin, args...) cmd.Env = append( os.Environ(), - "GIT_DIR="+repo.path, + "GIT_DIR="+repo.gitDir, // Disable grafts when running our commands: "GIT_GRAFT_FILE="+os.DevNull, ) @@ -137,640 +159,37 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { return cmd } -func (repo *Repository) Path() string { - return repo.path -} - -func (repo *Repository) Close() error { - return nil -} - -type Reference struct { - Refname string - ObjectType ObjectType - ObjectSize counts.Count32 - OID OID -} - -type ReferenceIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader - errChan <-chan error -} - -// NewReferenceIter returns an iterator that iterates over all of the -// references in `repo`. -func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { - cmd := repo.gitCommand( - "for-each-ref", "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", - ) - - out, err := cmd.StdoutPipe() - if err != nil { - return nil, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return nil, err - } - - return &ReferenceIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - errChan: make(chan error, 1), - }, nil +// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute +// or it might be relative to the current directory. +func (repo *Repository) GitDir() string { + return repo.gitDir } -func (iter *ReferenceIter) Next() (Reference, bool, error) { - line, err := iter.f.ReadString('\n') - if err != nil { - if err != io.EOF { - return Reference{}, false, err - } - return Reference{}, false, nil - } - line = line[:len(line)-1] - words := strings.Split(line, " ") - if len(words) != 4 { - return Reference{}, false, fmt.Errorf("line improperly formatted: %#v", line) - } - oid, err := NewOID(words[0]) - if err != nil { - return Reference{}, false, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) - } - objectType := ObjectType(words[1]) - objectSize, err := strconv.ParseUint(words[2], 10, 32) - if err != nil { - return Reference{}, false, fmt.Errorf("object size improperly formatted: %#v", words[2]) - } - refname := words[3] - return Reference{ - Refname: refname, - ObjectType: objectType, - ObjectSize: counts.Count32(objectSize), - OID: oid, - }, true, nil -} - -func (l *ReferenceIter) Close() error { - err := l.out.Close() - err2 := l.cmd.Wait() - if err == nil { - err = err2 - } - return err -} - -type BatchObjectIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader -} - -// NewBatchObjectIter returns iterates over objects whose names are -// fed into its stdin. The output is buffered, so it has to be closed -// before you can be sure to read all of the objects. -func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { - cmd := repo.gitCommand("cat-file", "--batch", "--buffer") - - in, err := cmd.StdinPipe() - if err != nil { - return nil, nil, err - } - - out, err := cmd.StdoutPipe() - if err != nil { - return nil, nil, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return nil, nil, err - } - - return &BatchObjectIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - }, in, nil -} - -func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, error) { - header, err := iter.f.ReadString('\n') - if err != nil { - return OID{}, "", 0, nil, err - } - oid, objectType, objectSize, err := parseBatchHeader("", header) - if err != nil { - return OID{}, "", 0, nil, err - } - // +1 for LF: - data := make([]byte, objectSize+1) - _, err = io.ReadFull(iter.f, data) - if err != nil { - return OID{}, "", 0, nil, err - } - data = data[:len(data)-1] - return oid, objectType, objectSize, data, nil -} - -func (l *BatchObjectIter) Close() error { - err := l.out.Close() - err2 := l.cmd.Wait() - if err == nil { - err = err2 - } - return err -} - -type ReferenceFilter func(Reference) bool - -func AllReferencesFilter(_ Reference) bool { - return true -} - -func PrefixFilter(prefix string) ReferenceFilter { - return func(r Reference) bool { - return strings.HasPrefix(r.Refname, prefix) - } -} - -var ( - BranchesFilter ReferenceFilter = PrefixFilter("refs/heads/") - TagsFilter ReferenceFilter = PrefixFilter("refs/tags/") - RemotesFilter ReferenceFilter = PrefixFilter("refs/remotes/") -) - -func notNilFilters(filters ...ReferenceFilter) []ReferenceFilter { - var ret []ReferenceFilter - for _, filter := range filters { - if filter != nil { - ret = append(ret, filter) - } - } - return ret -} - -func OrFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if filter(r) { - return true - } - } - return false - } - } -} - -func AndFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if !filter(r) { - return false - } - } - return true - } - } -} - -func NotFilter(filter ReferenceFilter) ReferenceFilter { - return func(r Reference) bool { - return !filter(r) - } -} - -// Parse a `cat-file --batch[-check]` output header line (including -// the trailing LF). `spec`, if not "", is used in error messages. -func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count32, error) { - header = header[:len(header)-1] - words := strings.Split(header, " ") - if words[len(words)-1] == "missing" { - if spec == "" { - spec = words[0] - } - return OID{}, "missing", 0, fmt.Errorf("missing object %s", spec) - } - - oid, err := NewOID(words[0]) - if err != nil { - return OID{}, "missing", 0, err - } - - size, err := strconv.ParseUint(words[2], 10, 0) - if err != nil { - return OID{}, "missing", 0, err - } - return oid, ObjectType(words[1]), counts.NewCount32(size), nil -} - -type ObjectIter struct { - cmd1 *exec.Cmd - cmd2 *exec.Cmd - in1 io.Writer - out1 io.ReadCloser - out2 io.ReadCloser - f *bufio.Reader - errChan <-chan error -} - -// NewObjectIter returns an iterator that iterates over objects in -// `repo`. The second return value is the stdin of the `rev-list` -// command. The caller can feed values into it but must close it in -// any case. -func (repo *Repository) NewObjectIter(args ...string) ( - *ObjectIter, io.WriteCloser, error, -) { - cmd1 := repo.gitCommand(append([]string{"rev-list", "--objects"}, args...)...) - in1, err := cmd1.StdinPipe() - if err != nil { - return nil, nil, err - } - - out1, err := cmd1.StdoutPipe() - if err != nil { - return nil, nil, err - } - - cmd1.Stderr = os.Stderr - - err = cmd1.Start() - if err != nil { - return nil, nil, err - } - - cmd2 := repo.gitCommand("cat-file", "--batch-check", "--buffer") - in2, err := cmd2.StdinPipe() - if err != nil { - out1.Close() - cmd1.Wait() - return nil, nil, err - } - - out2, err := cmd2.StdoutPipe() - if err != nil { - in2.Close() - out1.Close() - cmd1.Wait() - return nil, nil, err - } - - cmd2.Stderr = os.Stderr - - err = cmd2.Start() - if err != nil { - return nil, nil, err - } - - errChan := make(chan error, 1) - - go func() { - defer in2.Close() - f1 := bufio.NewReader(out1) - f2 := bufio.NewWriter(in2) - defer f2.Flush() - for { - line, err := f1.ReadString('\n') - if err != nil { - if err != io.EOF { - errChan <- err - } else { - errChan <- nil - } - return - } - if len(line) <= 40 { - errChan <- fmt.Errorf("line too short: %#v", line) - } - f2.WriteString(line[:40]) - f2.WriteByte('\n') - } - }() - - return &ObjectIter{ - cmd1: cmd1, - cmd2: cmd2, - out1: out1, - out2: out2, - f: bufio.NewReader(out2), - errChan: errChan, - }, in1, nil -} - -// CreateObject creates a new Git object, of the specified type, in -// `Repository`. `writer` is a function that writes the object in `git -// hash-object` input format. This is used for testing only. -func (repo *Repository) CreateObject(t ObjectType, writer func(io.Writer) error) (OID, error) { - cmd := repo.gitCommand("hash-object", "-w", "-t", string(t), "--stdin") - in, err := cmd.StdinPipe() - if err != nil { - return OID{}, err - } - - out, err := cmd.StdoutPipe() - if err != nil { - return OID{}, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return OID{}, err - } - - err = writer(in) - err2 := in.Close() - if err != nil { - cmd.Wait() - return OID{}, err - } - if err2 != nil { - cmd.Wait() - return OID{}, err2 - } - - output, err := ioutil.ReadAll(out) - err2 = cmd.Wait() - if err != nil { - return OID{}, err - } - if err2 != nil { - return OID{}, err2 - } - - return NewOID(string(bytes.TrimSpace(output))) -} - -func (repo *Repository) UpdateRef(refname string, oid OID) error { - var cmd *exec.Cmd - - if oid == NullOID { - cmd = repo.gitCommand("update-ref", "-d", refname) - } else { - cmd = repo.gitCommand("update-ref", refname, oid.String()) - } - return cmd.Run() -} - -// Next returns the next object, or EOF when done. -func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { - line, err := l.f.ReadString('\n') - if err != nil { - return OID{}, "", 0, err - } - - return parseBatchHeader("", line) -} - -func (l *ObjectIter) Close() error { - l.out1.Close() - err := <-l.errChan - l.out2.Close() - err2 := l.cmd1.Wait() - if err == nil { - err = err2 - } - err2 = l.cmd2.Wait() - if err == nil { - err = err2 - } - return err -} - -type ObjectHeaderIter struct { - name string - data string -} - -// Iterate over a commit or tag object header. `data` should be the -// object's contents, which is usually terminated by a blank line that -// separates the header from the comment. However, annotated tags -// don't always include comments, and Git even tolerates commits -// without comments, so don't insist on a blank line. `name` is used -// in error messages. -func NewObjectHeaderIter(name string, data []byte) (ObjectHeaderIter, error) { - headerEnd := bytes.Index(data, []byte("\n\n")) - if headerEnd == -1 { - if len(data) == 0 { - return ObjectHeaderIter{}, fmt.Errorf("%s has zero length", name) - } - - if data[len(data)-1] != '\n' { - return ObjectHeaderIter{}, fmt.Errorf("%s has no terminating LF", name) - } - - return ObjectHeaderIter{name, string(data)}, nil - } - return ObjectHeaderIter{name, string(data[:headerEnd+1])}, nil -} - -func (iter *ObjectHeaderIter) HasNext() bool { - return len(iter.data) > 0 -} - -func (iter *ObjectHeaderIter) Next() (string, string, error) { - if len(iter.data) == 0 { - return "", "", fmt.Errorf("header for %s read past end", iter.name) - } - header := iter.data - keyEnd := strings.IndexByte(header, ' ') - if keyEnd == -1 { - return "", "", fmt.Errorf("malformed header in %s", iter.name) - } - key := header[:keyEnd] - header = header[keyEnd+1:] - valueEnd := strings.IndexByte(header, '\n') - if valueEnd == -1 { - return "", "", fmt.Errorf("malformed header in %s", iter.name) - } - value := header[:valueEnd] - iter.data = header[valueEnd+1:] - return key, value, nil -} - -type Commit struct { - Size counts.Count32 - Parents []OID - Tree OID -} - -func ParseCommit(oid OID, data []byte) (*Commit, error) { - var parents []OID - var tree OID - var treeFound bool - iter, err := NewObjectHeaderIter(oid.String(), data) +// GitPath returns that path of a file within the git repository, by +// calling `git rev-parse --git-path $relPath`. The returned path is +// relative to the current directory. +func (repo *Repository) GitPath(relPath string) (string, error) { + cmd := repo.GitCommand("rev-parse", "--git-path", relPath) + out, err := cmd.Output() if err != nil { - return nil, err - } - for iter.HasNext() { - key, value, err := iter.Next() - if err != nil { - return nil, err - } - switch key { - case "parent": - parent, err := NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed parent header in commit %s", oid) - } - parents = append(parents, parent) - case "tree": - if treeFound { - return nil, fmt.Errorf("multiple trees found in commit %s", oid) - } - tree, err = NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed tree header in commit %s", oid) - } - treeFound = true - } - } - if !treeFound { - return nil, fmt.Errorf("no tree found in commit %s", oid) - } - return &Commit{ - Size: counts.NewCount32(uint64(len(data))), - Parents: parents, - Tree: tree, - }, nil -} - -type Tree struct { - data string -} - -func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil -} - -func (tree Tree) Size() counts.Count32 { - return counts.NewCount32(uint64(len(tree.data))) -} - -// Note that Name shares memory with the tree data that were -// originally read; i.e., retaining a pointer to Name keeps the tree -// data reachable. -type TreeEntry struct { - Name string - OID OID - Filemode uint -} - -type TreeIter struct { - // The as-yet-unread part of the tree's data. - data string -} - -func (tree *Tree) Iter() *TreeIter { - return &TreeIter{ - data: tree.data, + return "", fmt.Errorf( + "running 'git rev-parse --git-path %s': %w", relPath, err, + ) } + // `git rev-parse --git-path` is documented to return the path + // relative to the current directory. Since we haven't changed the + // current directory, we can use it as-is: + return string(bytes.TrimSpace(out)), nil } -func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { - var entry TreeEntry - - if len(iter.data) == 0 { - return TreeEntry{}, false, nil - } - - spAt := strings.IndexByte(iter.data, ' ') - if spAt < 0 { - return TreeEntry{}, false, errors.New("failed to find SP after mode") - } - mode, err := strconv.ParseUint(iter.data[:spAt], 8, 32) - if err != nil { - return TreeEntry{}, false, err - } - entry.Filemode = uint(mode) - - iter.data = iter.data[spAt+1:] - nulAt := strings.IndexByte(iter.data, 0) - if nulAt < 0 { - return TreeEntry{}, false, errors.New("failed to find NUL after filename") - } - - entry.Name = iter.data[:nulAt] - - iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { - return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") - } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] - - return entry, true, nil +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo } -type Tag struct { - Size counts.Count32 - Referent OID - ReferentType ObjectType +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() } -func ParseTag(oid OID, data []byte) (*Tag, error) { - var referent OID - var referentFound bool - var referentType ObjectType - var referentTypeFound bool - iter, err := NewObjectHeaderIter(oid.String(), data) - if err != nil { - return nil, err - } - for iter.HasNext() { - key, value, err := iter.Next() - if err != nil { - return nil, err - } - switch key { - case "object": - if referentFound { - return nil, fmt.Errorf("multiple referents found in tag %s", oid) - } - referent, err = NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed object header in tag %s", oid) - } - referentFound = true - case "type": - if referentTypeFound { - return nil, fmt.Errorf("multiple types found in tag %s", oid) - } - referentType = ObjectType(value) - referentTypeFound = true - } - } - if !referentFound { - return nil, fmt.Errorf("no object found in tag %s", oid) - } - if !referentTypeFound { - return nil, fmt.Errorf("no type found in tag %s", oid) - } - return &Tag{ - Size: counts.NewCount32(uint64(len(data))), - Referent: referent, - ReferentType: referentType, - }, nil +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() } diff --git a/git/git_bin.go b/git/git_bin.go new file mode 100644 index 0000000..526e9bb --- /dev/null +++ b/git/git_bin.go @@ -0,0 +1,42 @@ +package git + +import ( + "path/filepath" + "sync" + + "github.com/cli/safeexec" +) + +// This variable will be used to memoize the result of `findGitBin()`, +// since its return value only depends on the environment. +var gitBinMemo struct { + once sync.Once + + gitBin string + err error +} + +// findGitBin finds the `git` binary in PATH that should be used by +// the rest of `git-sizer`. It uses `safeexec` to find the executable, +// because on Windows, `exec.Cmd` looks not only in PATH, but also in +// the current directory. This is a potential risk if the repository +// being scanned is hostile and non-bare because it might possibly +// contain an executable file named `git`. +func findGitBin() (string, error) { + gitBinMemo.once.Do(func() { + p, err := safeexec.LookPath("git") + if err != nil { + gitBinMemo.err = err + return + } + + p, err = filepath.Abs(p) + if err != nil { + gitBinMemo.err = err + return + } + + gitBinMemo.gitBin = p + }) + return gitBinMemo.gitBin, gitBinMemo.err +} diff --git a/git/gitconfig.go b/git/gitconfig.go new file mode 100644 index 0000000..76b8422 --- /dev/null +++ b/git/gitconfig.go @@ -0,0 +1,188 @@ +package git + +import ( + "bytes" + "errors" + "fmt" + "os/exec" + "strconv" + "strings" +) + +// ConfigEntry represents an entry in the gitconfig. +type ConfigEntry struct { + // Key is the entry's key, with any common `prefix` removed (see + // `Config()`). + Key string + + // Value is the entry's value, as a string. + Value string +} + +// Config represents the gitconfig, or part of the gitconfig, read by +// `ReadConfig()`. +type Config struct { + // Prefix is the key prefix that was read to fill this `Config`. + Prefix string + + // Entries contains the configuration entries that matched + // `Prefix`, in the order that they are reported by `git config + // --list`. + Entries []ConfigEntry +} + +// GetConfig returns the entries from gitconfig. If `prefix` is +// provided, then only include entries in that section, which must +// match the at a component boundary (as defined by +// `configKeyMatchesPrefix()`), and strip off the prefix in the keys +// that are returned. +func (repo *Repository) GetConfig(prefix string) (*Config, error) { + cmd := repo.GitCommand("config", "--list", "-z") + + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("reading git configuration: %w", err) + } + + config := Config{ + Prefix: prefix, + } + + for len(out) > 0 { + keyEnd := bytes.IndexByte(out, '\n') + if keyEnd == -1 { + return nil, errors.New("invalid output from 'git config'") + } + key := string(out[:keyEnd]) + out = out[keyEnd+1:] + valueEnd := bytes.IndexByte(out, 0) + if valueEnd == -1 { + return nil, errors.New("invalid output from 'git config'") + } + value := string(out[:valueEnd]) + out = out[valueEnd+1:] + + ok, rest := configKeyMatchesPrefix(key, prefix) + if !ok { + continue + } + + entry := ConfigEntry{ + Key: rest, + Value: value, + } + config.Entries = append(config.Entries, entry) + } + + return &config, nil +} + +// FullKey returns the full gitconfig key name for the relative key +// name `key`. +func (config *Config) FullKey(key string) string { + if config.Prefix == "" { + return key + } + return fmt.Sprintf("%s.%s", config.Prefix, key) +} + +// configKeyMatchesPrefix checks whether `key` starts with `prefix` at +// a component boundary (i.e., at a '.'). If yes, it returns `true` +// and the part of the key after the prefix; e.g.: +// +// configKeyMatchesPrefix("foo.bar", "foo") → true, "bar" +// configKeyMatchesPrefix("foo.bar", "foo.") → true, "bar" +// configKeyMatchesPrefix("foo.bar", "foo.bar") → true, "" +// configKeyMatchesPrefix("foo.bar", "foo.bar.") → false, "" +func configKeyMatchesPrefix(key, prefix string) (bool, string) { + if prefix == "" { + return true, key + } + if !strings.HasPrefix(key, prefix) { + return false, "" + } + + if prefix[len(prefix)-1] == '.' { + return true, key[len(prefix):] + } + if len(key) == len(prefix) { + return true, "" + } + if key[len(prefix)] == '.' { + return true, key[len(prefix)+1:] + } + return false, "" +} + +func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { + // Note that `git config --get` didn't get `--default` until Git + // 2.18 (released 2018-06-21). + cmd := repo.GitCommand( + "config", "--get", key, + ) + + out, err := cmd.Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + if len(out) > 0 && out[len(out)-1] == '\n' { + out = out[:len(out)-1] + } + + return string(out), nil +} + +func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, error) { + // Note that `git config --get` didn't get `--type=bool` or + // `--default` until Git 2.18 (released 2018-06-21). + cmd := repo.GitCommand( + "config", "--get", "--bool", key, + ) + + out, err := cmd.Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + s := string(bytes.TrimSpace(out)) + value, err := strconv.ParseBool(s) + if err != nil { + return defaultValue, fmt.Errorf("unexpected bool value from 'git config': %q", s) + } + + return value, nil +} + +func (repo *Repository) ConfigIntDefault(key string, defaultValue int) (int, error) { + // Note that `git config --get` didn't get `--type=int` or + // `--default` until Git 2.18 (released 2018-06-21). + cmd := repo.GitCommand( + "config", "--get", "--int", key, + ) + + out, err := cmd.Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + s := string(bytes.TrimSpace(out)) + value, err := strconv.Atoi(s) + if err != nil { + return defaultValue, fmt.Errorf("unexpected int value from 'git config': %q", s) + } + + return value, nil +} diff --git a/git/gitconfig_test.go b/git/gitconfig_test.go new file mode 100644 index 0000000..a98c4df --- /dev/null +++ b/git/gitconfig_test.go @@ -0,0 +1,36 @@ +package git + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestConfigKeyMatchesPrefix(t *testing.T) { + for _, p := range []struct { + key, prefix string + expectedBool bool + expectedString string + }{ + {"foo.bar", "", true, "foo.bar"}, + {"foo.bar", "foo", true, "bar"}, + {"foo.bar", "foo.", true, "bar"}, + {"foo.bar", "foo.bar", true, ""}, + {"foo.bar", "foo.bar.", false, ""}, + {"foo.bar", "foo.bar.baz", false, ""}, + {"foo.bar", "foo.barbaz", false, ""}, + {"foo.bar.baz", "foo.bar", true, "baz"}, + {"foo.barbaz", "foo.bar", false, ""}, + {"foo.bar", "bar", false, ""}, + } { + t.Run( + fmt.Sprintf("TestConfigKeyMatchesPrefix(%q, %q)", p.key, p.prefix), + func(t *testing.T) { + ok, s := configKeyMatchesPrefix(p.key, p.prefix) + assert.Equal(t, p.expectedBool, ok) + assert.Equal(t, p.expectedString, s) + }, + ) + } +} diff --git a/git/obj_head_iter.go b/git/obj_head_iter.go new file mode 100644 index 0000000..1454d1c --- /dev/null +++ b/git/obj_head_iter.go @@ -0,0 +1,63 @@ +package git + +import ( + "bytes" + "fmt" + "strings" +) + +// ObjectHeaderIter iterates over the headers within a commit or tag +// object. +type ObjectHeaderIter struct { + name string + data string +} + +// NewObjectHeaderIter returns an `ObjectHeaderIter` that iterates +// over the headers in a commit or tag object. `data` should be the +// object's contents, which is usually terminated by a blank line that +// separates the header from the comment. However, annotated tags +// don't always include comments, and Git even tolerates commits +// without comments, so don't insist on a blank line. `name` is used +// in error messages. +func NewObjectHeaderIter(name string, data []byte) (ObjectHeaderIter, error) { + headerEnd := bytes.Index(data, []byte("\n\n")) + if headerEnd == -1 { + if len(data) == 0 { + return ObjectHeaderIter{}, fmt.Errorf("%s has zero length", name) + } + + if data[len(data)-1] != '\n' { + return ObjectHeaderIter{}, fmt.Errorf("%s has no terminating LF", name) + } + + return ObjectHeaderIter{name, string(data)}, nil + } + return ObjectHeaderIter{name, string(data[:headerEnd+1])}, nil +} + +// HasNext returns true iff there are more headers to retrieve. +func (iter *ObjectHeaderIter) HasNext() bool { + return len(iter.data) > 0 +} + +// Next returns the key and value of the next header. +func (iter *ObjectHeaderIter) Next() (string, string, error) { + if len(iter.data) == 0 { + return "", "", fmt.Errorf("header for %s read past end", iter.name) + } + header := iter.data + keyEnd := strings.IndexByte(header, ' ') + if keyEnd == -1 { + return "", "", fmt.Errorf("malformed header in %s", iter.name) + } + key := header[:keyEnd] + header = header[keyEnd+1:] + valueEnd := strings.IndexByte(header, '\n') + if valueEnd == -1 { + return "", "", fmt.Errorf("malformed header in %s", iter.name) + } + value := header[:valueEnd] + iter.data = header[valueEnd+1:] + return key, value, nil +} diff --git a/git/obj_iter.go b/git/obj_iter.go new file mode 100644 index 0000000..c367f11 --- /dev/null +++ b/git/obj_iter.go @@ -0,0 +1,148 @@ +package git + +import ( + "bufio" + "context" + "fmt" + "io" + + "github.com/github/go-pipe/pipe" +) + +// ObjectIter iterates over objects in a Git repository. +type ObjectIter struct { + ctx context.Context + p *pipe.Pipeline + oidCh chan OID + errCh chan error + headerCh chan BatchHeader +} + +// NewObjectIter returns an iterator that iterates over objects in +// `repo`. The arguments are passed to `git rev-list --objects`. The +// second return value is the stdin of the `rev-list` command. The +// caller can feed values into it but must close it in any case. +func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) { + iter := ObjectIter{ + ctx: ctx, + p: pipe.New(), + oidCh: make(chan OID), + errCh: make(chan error), + headerCh: make(chan BatchHeader), + } + hashHexSize := repo.HashSize() * 2 + iter.p.Add( + // Read OIDs from `iter.oidCh` and write them to `git + // rev-list`: + pipe.Function( + "request-objects", + func(ctx context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { + out := bufio.NewWriter(stdout) + + for { + select { + case oid, ok := <-iter.oidCh: + if !ok { + return out.Flush() + } + if _, err := fmt.Fprintln(out, oid.String()); err != nil { + return fmt.Errorf("writing to 'git cat-file': %w", err) + } + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + + // Walk starting at the OIDs on `stdin` and output the OIDs + // (possibly followed by paths) of all of the Git objects + // found. + pipe.CommandStage( + "git-rev-list", + repo.GitCommand("rev-list", "--objects", "--stdin", "--date-order"), + ), + + // Read the output of `git rev-list --objects`, strip off any + // trailing information, and write the OIDs to `git cat-file`: + pipe.LinewiseFunction( + "copy-oids", + func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { + if len(line) < hashHexSize { + return fmt.Errorf("line too short: '%s'", line) + } + if _, err := stdout.Write(line[:hashHexSize]); err != nil { + return fmt.Errorf("writing OID to 'git cat-file': %w", err) + } + if err := stdout.WriteByte('\n'); err != nil { + return fmt.Errorf("writing LF to 'git cat-file': %w", err) + } + return nil + }, + ), + + // Process the OIDs from stdin and, for each object, output a + // header: + pipe.CommandStage( + "git-cat-file", + repo.GitCommand("cat-file", "--batch-check", "--buffer"), + ), + + // Parse the object headers and shove them into `headerCh`: + pipe.Function( + "object-parser", + func(ctx context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + defer close(iter.headerCh) + + f := bufio.NewReader(stdin) + + for { + header, err := f.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading from 'git cat-file': %w", err) + } + batchHeader, err := ParseBatchHeader("", header) + if err != nil { + return fmt.Errorf("parsing output of 'git cat-file': %w", err) + } + + iter.headerCh <- batchHeader + } + }, + ), + ) + + if err := iter.p.Start(ctx); err != nil { + return nil, err + } + + return &iter, nil +} + +// AddRoot adds another OID to be included in the walk. +func (iter *ObjectIter) AddRoot(oid OID) error { + select { + case iter.oidCh <- oid: + return nil + case <-iter.ctx.Done(): + return iter.ctx.Err() + } +} + +// Close closes the iterator and frees up resources. +func (iter *ObjectIter) Close() { + close(iter.oidCh) +} + +// Next returns either the next object (its OID, type, and size), or a +// `false` boolean value to indicate that there are no data left. +func (iter *ObjectIter) Next() (BatchHeader, bool, error) { + header, ok := <-iter.headerCh + if !ok { + return missingHeader, false, iter.p.Wait() + } + return header, true, nil +} diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..fbeb246 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/oid.go b/git/oid.go new file mode 100644 index 0000000..2a2bdfc --- /dev/null +++ b/git/oid.go @@ -0,0 +1,100 @@ +package git + +import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" + "encoding/hex" + "errors" +) + +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + +// OID represents the SHA-1 object ID of a Git object, in binary +// format. +type OID struct { + v [HashSizeMax]byte + hashSize int +} + +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} + +// OIDFromBytes converts a byte slice containing an object ID in +// binary format into an `OID`. +func OIDFromBytes(oidBytes []byte) (OID, error) { + var oid OID + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { + return OID{}, errors.New("bytes oid has the wrong length") + } + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) + return oid, nil +} + +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. +func NewOID(s string) (OID, error) { + oidBytes, err := hex.DecodeString(s) + if err != nil { + return OID{}, err + } + return OIDFromBytes(oidBytes) +} + +// String formats `oid` as a string in hex format. +func (oid OID) String() string { + return hex.EncodeToString(oid.v[:oid.hashSize]) +} + +// Bytes returns a byte slice view of `oid`, in binary format. +func (oid OID) Bytes() []byte { + return oid.v[:oid.hashSize] +} + +// MarshalJSON expresses `oid` as a JSON string with its enclosing +// quotation marks. +func (oid OID) MarshalJSON() ([]byte, error) { + src := oid.v[:oid.hashSize] + dst := make([]byte, hex.EncodedLen(len(src))+2) + dst[0] = '"' + dst[len(dst)-1] = '"' + hex.Encode(dst[1:len(dst)-1], src) + return dst, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go new file mode 100644 index 0000000..46aff66 --- /dev/null +++ b/git/ref_filter.go @@ -0,0 +1,142 @@ +package git + +import ( + "regexp" + "strings" +) + +type ReferenceFilter interface { + Filter(refname string) bool +} + +// Combiner combines two `ReferenceFilter`s into one compound one. +// `f1` is allowed to be `nil`. +type Combiner interface { + Combine(f1, f2 ReferenceFilter) ReferenceFilter + Inverted() Combiner +} + +type inverse struct { + f ReferenceFilter +} + +func (f inverse) Filter(refname string) bool { + return !f.f.Filter(refname) +} + +type intersection struct { + f1, f2 ReferenceFilter +} + +func (f intersection) Filter(refname string) bool { + return f.f1.Filter(refname) && f.f2.Filter(refname) +} + +// Include is a Combiner that includes the references matched by `f2`. +// If `f1` is `nil`, it is treated as including nothing. +type include struct{} + +func (_ include) Combine(f1, f2 ReferenceFilter) ReferenceFilter { + if f1 == nil { + return f2 + } + return union{f1, f2} +} + +func (_ include) Inverted() Combiner { + return Exclude +} + +var Include include + +type union struct { + f1, f2 ReferenceFilter +} + +func (f union) Filter(refname string) bool { + return f.f1.Filter(refname) || f.f2.Filter(refname) +} + +// Exclude is a Combiner that excludes the references matched by `f2`. +// If `f1` is `nil`, it is treated as including everything. +type exclude struct{} + +func (_ exclude) Combine(f1, f2 ReferenceFilter) ReferenceFilter { + if f1 == nil { + return inverse{f2} + } + return intersection{f1, inverse{f2}} + +} + +func (_ exclude) Inverted() Combiner { + return include{} +} + +var Exclude exclude + +type allReferencesFilter struct{} + +func (_ allReferencesFilter) Filter(_ string) bool { + return true +} + +var AllReferencesFilter allReferencesFilter + +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + +// PrefixFilter returns a `ReferenceFilter` that matches references +// whose names start with the specified `prefix`, which must match at +// a component boundary. For example, +// +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". +// +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". +func PrefixFilter(prefix string) ReferenceFilter { + if prefix == "" { + return AllReferencesFilter + } + return prefixFilter{prefix} +} + +type prefixFilter struct { + prefix string +} + +func (f prefixFilter) Filter(refname string) bool { + if strings.HasSuffix(f.prefix, "/") { + return strings.HasPrefix(refname, f.prefix) + } + + return strings.HasPrefix(refname, f.prefix) && + (len(refname) == len(f.prefix) || refname[len(f.prefix)] == '/') +} + +// RegexpFilter returns a `ReferenceFilter` that matches references +// whose names match the specified `prefix`, which must match the +// whole reference name. +func RegexpFilter(pattern string) (ReferenceFilter, error) { + pattern = "^" + pattern + "$" + re, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + + return regexpFilter{re}, nil +} + +type regexpFilter struct { + re *regexp.Regexp +} + +func (f regexpFilter) Filter(refname string) bool { + return f.re.MatchString(refname) +} diff --git a/git/ref_filter_test.go b/git/ref_filter_test.go new file mode 100644 index 0000000..20ae018 --- /dev/null +++ b/git/ref_filter_test.go @@ -0,0 +1,117 @@ +package git_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/github/git-sizer/git" +) + +func TestPrefixFilter(t *testing.T) { + t.Parallel() + + for _, p := range []struct { + prefix string + refname string + expected bool + }{ + {"refs/heads", "refs/heads/master", true}, + {"refs/heads", "refs/tags/master", false}, + {"refs/heads", "refs/he", false}, + {"refs/heads", "refs/headstrong", false}, + {"refs/heads", "refs/tags/refs/heads", false}, + + {"refs/foo", "refs/foo/bar", true}, + {"refs/foo", "refs/foo", true}, + {"refs/foo", "refs/foobar", false}, + + {"refs/foo/", "refs/foo/bar", true}, + {"refs/foo/", "refs/foo", false}, + {"refs/foo/", "refs/foobar", false}, + + {"refs/stash", "refs/stash", true}, + {"refs/remotes", "refs/remotes/origin/master", true}, + } { + t.Run( + fmt.Sprintf("prefix '%s', refname '%s'", p.prefix, p.refname), + func(t *testing.T) { + assert.Equal( + t, + p.expected, + git.PrefixFilter(p.prefix).Filter(p.refname), + ) + }, + ) + } +} + +func regexpFilter(t *testing.T, pattern string) git.ReferenceFilter { + t.Helper() + + f, err := git.RegexpFilter(pattern) + require.NoError(t, err) + return f +} + +func TestRegexpFilter(t *testing.T) { + t.Parallel() + + for _, p := range []struct { + pattern string + refname string + expected bool + }{ + {`refs/heads/master`, "refs/heads/master", true}, + {`refs/heads/.*`, "refs/heads/master", true}, + {`.*/heads/.*`, "refs/heads/master", true}, + {`.*/heads/`, "refs/heads/master", false}, + {`.*/heads`, "refs/heads/master", false}, + {`/heads/.*`, "refs/heads/master", false}, + {`heads/.*`, "refs/heads/master", false}, + {`refs/tags/release-\d+\.\d+\.\d+`, "refs/tags/release-1.22.333", true}, + {`refs/tags/release-\d+\.\d+\.\d+`, "refs/tags/release-1.2.3rc1", false}, + } { + t.Run( + fmt.Sprintf("pattern '%s', refname '%s'", p.pattern, p.refname), + func(t *testing.T) { + assert.Equal( + t, + p.expected, + regexpFilter(t, p.pattern).Filter(p.refname), + ) + }, + ) + } +} + +func TestIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + var filter git.ReferenceFilter + filter = git.Include.Combine(filter, git.PrefixFilter("refs/heads")) + filter = git.Exclude.Combine(filter, regexpFilter(t, "refs/heads/.*foo.*")) + filter = git.Include.Combine(filter, git.PrefixFilter("refs/remotes")) + filter = git.Exclude.Combine(filter, git.PrefixFilter("refs/remotes/foo")) + + for _, p := range []struct { + refname string + expected bool + }{ + {"refs/heads/master", true}, + {"refs/heads/buffoon", false}, + {"refs/remotes/origin/master", true}, + {"refs/remotes/foo/master", false}, + {"refs/not-mentioned", false}, + } { + t.Run( + fmt.Sprintf("include-exclude '%s'", p.refname), + func(t *testing.T) { + assert.Equal(t, p.expected, filter.Filter(p.refname)) + }, + ) + } + +} diff --git a/git/ref_iter.go b/git/ref_iter.go new file mode 100644 index 0000000..74e8415 --- /dev/null +++ b/git/ref_iter.go @@ -0,0 +1,90 @@ +package git + +import ( + "bufio" + "context" + "fmt" + "io" + + "github.com/github/go-pipe/pipe" +) + +// ReferenceIter is an iterator that interates over references. +type ReferenceIter struct { + refCh chan Reference + errCh chan error +} + +// NewReferenceIter returns an iterator that iterates over all of the +// references in `repo`. +func (repo *Repository) NewReferenceIter(ctx context.Context) (*ReferenceIter, error) { + iter := ReferenceIter{ + refCh: make(chan Reference), + errCh: make(chan error), + } + + p := pipe.New() + p.Add( + // Output all references and their values: + pipe.CommandStage( + "git-for-each-ref", + repo.GitCommand( + "for-each-ref", + "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", + ), + ), + + // Read the references and send them to `iter.refCh`, then close + // the channel. + pipe.Function( + "parse-refs", + func(ctx context.Context, env pipe.Env, stdin io.Reader, stdout io.Writer) error { + defer close(iter.refCh) + + in := bufio.NewReader(stdin) + for { + line, err := in.ReadBytes('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading 'git for-each-ref' output: %w", err) + } + + ref, err := ParseReference(string(line[:len(line)-1])) + if err != nil { + return fmt.Errorf("parsing 'git for-each-ref' output: %w", err) + } + select { + case iter.refCh <- ref: + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + ) + + err := p.Start(ctx) + if err != nil { + return nil, err + } + + go func() { + iter.errCh <- p.Wait() + }() + + return &iter, nil +} + +// Next returns either the next reference or a boolean `false` value +// indicating that the iteration is over. On errors, return an error +// (in this case, the caller must still call `Close()`). +func (iter *ReferenceIter) Next() (Reference, bool, error) { + ref, ok := <-iter.refCh + if !ok { + return Reference{}, false, <-iter.errCh + } + + return ref, true, nil +} diff --git a/git/reference.go b/git/reference.go new file mode 100644 index 0000000..e8a1aaf --- /dev/null +++ b/git/reference.go @@ -0,0 +1,52 @@ +package git + +import ( + "fmt" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +// Reference represents a Git reference. +type Reference struct { + // Refname is the full reference name of the reference. + Refname string + + // ObjectType is the type of the object referenced. + ObjectType ObjectType + + // ObjectSize is the size of the referred-to object, in bytes. + ObjectSize counts.Count32 + + // OID is the OID of the referred-to object. + OID OID +} + +// ParseReference parses `line` (a non-LF-terminated line) into a +// `Reference`. It is assumed that `line` is formatted like the output +// of +// +// git for-each-ref --format='%(objectname) %(objecttype) %(objectsize) %(refname)' +func ParseReference(line string) (Reference, error) { + words := strings.Split(line, " ") + if len(words) != 4 { + return Reference{}, fmt.Errorf("line improperly formatted: %#v", line) + } + oid, err := NewOID(words[0]) + if err != nil { + return Reference{}, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) + } + objectType := ObjectType(words[1]) + objectSize, err := strconv.ParseUint(words[2], 10, 32) + if err != nil { + return Reference{}, fmt.Errorf("object size improperly formatted: %#v", words[2]) + } + refname := words[3] + return Reference{ + Refname: refname, + ObjectType: objectType, + ObjectSize: counts.Count32(objectSize), + OID: oid, + }, nil +} diff --git a/git/tag.go b/git/tag.go new file mode 100644 index 0000000..abc76ba --- /dev/null +++ b/git/tag.go @@ -0,0 +1,61 @@ +package git + +import ( + "fmt" + + "github.com/github/git-sizer/counts" +) + +// Tag represents the information that we need about a Git tag object. +type Tag struct { + Size counts.Count32 + Referent OID + ReferentType ObjectType +} + +// ParseTag parses the Git tag object whose contents are contained in +// `data`. `oid` is used only in error messages. +func ParseTag(oid OID, data []byte) (*Tag, error) { + var referent OID + var referentFound bool + var referentType ObjectType + var referentTypeFound bool + iter, err := NewObjectHeaderIter(oid.String(), data) + if err != nil { + return nil, err + } + for iter.HasNext() { + key, value, err := iter.Next() + if err != nil { + return nil, err + } + switch key { + case "object": + if referentFound { + return nil, fmt.Errorf("multiple referents found in tag %s", oid) + } + referent, err = NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed object header in tag %s", oid) + } + referentFound = true + case "type": + if referentTypeFound { + return nil, fmt.Errorf("multiple types found in tag %s", oid) + } + referentType = ObjectType(value) + referentTypeFound = true + } + } + if !referentFound { + return nil, fmt.Errorf("no object found in tag %s", oid) + } + if !referentTypeFound { + return nil, fmt.Errorf("no type found in tag %s", oid) + } + return &Tag{ + Size: counts.NewCount32(uint64(len(data))), + Referent: referent, + ReferentType: referentType, + }, nil +} diff --git a/git/tree.go b/git/tree.go new file mode 100644 index 0000000..18cb3ee --- /dev/null +++ b/git/tree.go @@ -0,0 +1,88 @@ +package git + +import ( + "errors" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +// Tree represents a Git tree object. +type Tree struct { + data string + hashSize int +} + +// ParseTree parses the tree object whose contents are contained in +// `data`. `oid` is currently unused. +func ParseTree(oid OID, data []byte) (*Tree, error) { + return &Tree{string(data), oid.hashSize}, nil +} + +// Size returns the size of the tree object. +func (tree Tree) Size() counts.Count32 { + return counts.NewCount32(uint64(len(tree.data))) +} + +// TreeEntry represents an entry in a Git tree object. Note that Name +// shares memory with the tree data that were originally read; i.e., +// retaining a pointer to Name keeps the tree data reachable. +type TreeEntry struct { + Name string + OID OID + Filemode uint +} + +// TreeIter is an iterator over the entries in a Git tree object. +type TreeIter struct { + // The as-yet-unread part of the tree's data. + data string + hashSize int +} + +// Iter returns an iterator over the entries in `tree`. +func (tree *Tree) Iter() *TreeIter { + return &TreeIter{ + data: tree.data, + hashSize: tree.hashSize, + } +} + +// NextEntry returns either the next entry in a Git tree, or a `false` +// boolean value if there are no more entries. +func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { + var entry TreeEntry + + if len(iter.data) == 0 { + return TreeEntry{}, false, nil + } + + spAt := strings.IndexByte(iter.data, ' ') + if spAt < 0 { + return TreeEntry{}, false, errors.New("failed to find SP after mode") + } + mode, err := strconv.ParseUint(iter.data[:spAt], 8, 32) + if err != nil { + return TreeEntry{}, false, err + } + entry.Filemode = uint(mode) + + iter.data = iter.data[spAt+1:] + nulAt := strings.IndexByte(iter.data, 0) + if nulAt < 0 { + return TreeEntry{}, false, errors.New("failed to find NUL after filename") + } + + entry.Name = iter.data[:nulAt] + + iter.data = iter.data[nulAt+1:] + if len(iter.data) < iter.hashSize { + return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") + } + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] + + return entry, true, nil +} diff --git a/git_sizer_test.go b/git_sizer_test.go index 721d391..f5c8006 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -1,92 +1,60 @@ package main_test import ( + "bytes" + "context" + "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" + "runtime" + "strings" "testing" "time" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" + "github.com/github/git-sizer/internal/testutils" + "github.com/github/git-sizer/meter" "github.com/github/git-sizer/sizes" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) -// Smoke test that the program runs. -func TestExec(t *testing.T) { - cmd := exec.Command("bin/git-sizer") - output, err := cmd.CombinedOutput() - assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) -} - -func gitCommand(t *testing.T, repo *git.Repository, args ...string) *exec.Cmd { - cmd := exec.Command("git", args...) - cmd.Env = append(os.Environ(), "GIT_DIR="+repo.Path()) - return cmd -} +func sizerExe(t *testing.T) string { + t.Helper() -func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, contents string) { - dirPath := filepath.Dir(relativePath) - if dirPath != "." { - require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") + var v string + switch runtime.GOOS { + case "windows": + v = `bin\git-sizer.exe` + default: + v = "bin/git-sizer" } - filename := filepath.Join(repoPath, relativePath) - f, err := os.Create(filename) - require.NoErrorf(t, err, "creating file %q", filename) - _, err = f.WriteString(contents) - require.NoErrorf(t, err, "writing to file %q", filename) - require.NoErrorf(t, f.Close(), "closing file %q", filename) + v, err := exec.LookPath(v) + require.NoError(t, err) - cmd := gitCommand(t, repo, "add", relativePath) - cmd.Dir = repoPath - require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) -} + v, err = filepath.Abs(v) + require.NoError(t, err) -func addAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { - cmd.Env = append(cmd.Env, - "GIT_AUTHOR_NAME=Arthur", - "GIT_AUTHOR_EMAIL=arthur@example.com", - fmt.Sprintf("GIT_AUTHOR_DATE=%d -0700", timestamp.Unix()), - "GIT_COMMITTER_NAME=Constance", - "GIT_COMMITTER_EMAIL=constance@example.com", - fmt.Sprintf("GIT_COMMITTER_DATE=%d -0700", timestamp.Unix()), - ) - *timestamp = timestamp.Add(60 * time.Second) + return v } -func newGitBomb( - repoName string, depth, breadth int, body string, -) (repo *git.Repository, err error) { - path, err := ioutil.TempDir("", repoName) - if err != nil { - return nil, err - } - - defer func() { - if err != nil { - os.RemoveAll(path) - } - }() - - cmd := exec.Command("git", "init", "--bare", path) - err = cmd.Run() - if err != nil { - return nil, err - } +// Smoke test that the program runs. +func TestExec(t *testing.T) { + cmd := exec.Command(sizerExe(t)) + output, err := cmd.CombinedOutput() + assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) +} - repo, err = git.NewRepository(path) - if err != nil { - return nil, err - } +func newGitBomb(t *testing.T, repo *testutils.TestRepo, depth, breadth int, body string) { + t.Helper() - oid, err := repo.CreateObject("blob", func(w io.Writer) error { + oid := repo.CreateObject(t, "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) @@ -97,9 +65,9 @@ func newGitBomb( prefix := "f" for ; depth > 0; depth-- { - oid, err = repo.CreateObject("tree", func(w io.Writer) error { + oid = repo.CreateObject(t, "tree", func(w io.Writer) error { for i := 0; i < breadth; i++ { - _, err = fmt.Fprintf( + _, err := fmt.Fprintf( w, "%s %s%0*d\x00%s", mode, prefix, digits, i, oid.Bytes(), ) @@ -109,36 +77,469 @@ func newGitBomb( } return nil }) - if err != nil { - return nil, err - } mode = "40000" prefix = "d" } - oid, err = repo.CreateObject("commit", func(w io.Writer) error { + oid = repo.CreateObject(t, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ "author Example 1112911993 -0700\n"+ "committer Example 1112911993 -0700\n"+ "\n"+ - "Mwahahaha!\n", + "Test git bomb\n", oid, ) return err }) - if err != nil { - return nil, err + + repo.UpdateRef(t, "refs/heads/master", oid) +} + +// TestRefSelections tests various combinations of reference selection +// options. +func TestRefSelections(t *testing.T) { + t.Parallel() + + references := []struct { + // The plusses and spaces in the `results` string correspond + // to the expected results for one of the tests: `results[i]` + // tells whether we expect `refname` to be included ('+') or + // excluded (' ') in test case number `i`. + results string + + refname string + }{ + //nolint:gocritic // Want columns in comment to match initializers. + // 111111111 + //0123456789012345678 + {"+ + + + + + + + +", "refs/barfoo"}, + {"+ + + + + + +++ ", "refs/foo"}, + {"+ + + + + + + + +", "refs/foobar"}, + {"++ + + + +++ +++", "refs/heads/foo"}, + {"++ + + + ++ +++", "refs/heads/master"}, + {"+ + + ++ + ", "refs/notes/discussion"}, + {"+ + ++ + + ", "refs/remotes/origin/master"}, + {"+ + ++ + + + + +", "refs/remotes/upstream/foo"}, + {"+ + ++ + + ", "refs/remotes/upstream/master"}, + {"+ + + + ++ ", "refs/stash"}, + {"+ ++ + + +++ + +", "refs/tags/foolish"}, + {"+ ++ + + ++ + +", "refs/tags/other"}, + {"+ ++ + + ++ + ", "refs/tags/release-1"}, + {"+ ++ + + ++ + ", "refs/tags/release-2"}, + } + + // computeExpectations assembles and returns the results expected + // for test `i` from the `references` slice. + computeExpectations := func(i int) (string, int) { + var sb strings.Builder + fmt.Fprintln(&sb, "References (included references marked with '+'):") + count := 0 + for _, p := range references { + present := p.results[i] + fmt.Fprintf(&sb, "%c %s\n", present, p.refname) + if present == '+' { + count++ + } + } + return sb.String(), count + } + + // Create a test repo with one orphan commit per refname: + repo := testutils.NewTestRepo(t, true, "ref-selection") + t.Cleanup(func() { repo.Remove(t) }) + + for _, p := range references { + repo.CreateReferencedOrphan(t, p.refname) + } + + executable := sizerExe(t) + + for i, p := range []struct { + name string + args []string + config []git.ConfigEntry + }{ + { // 0 + name: "no arguments", + }, + { // 1 + name: "branches", + args: []string{"--branches"}, + }, + { // 2 + name: "no branches", + args: []string{"--no-branches"}, + }, + { // 3 + name: "tags", + args: []string{"--tags"}, + }, + { // 4 + name: "no tags", + args: []string{"--no-tags"}, + }, + { // 5 + name: "remotes", + args: []string{"--remotes"}, + }, + { // 6 + name: "no remotes", + args: []string{"--no-remotes"}, + }, + { // 7 + name: "notes", + args: []string{"--notes"}, + }, + { // 8 + name: "no notes", + args: []string{"--no-notes"}, + }, + { // 9 + name: "stash", + args: []string{"--stash"}, + }, + { // 10 + name: "no stash", + args: []string{"--no-stash"}, + }, + { // 11 + name: "branches and tags", + args: []string{"--branches", "--tags"}, + }, + { // 12 + name: "foo", + args: []string{"--include", "/.*foo.*/"}, + }, + { // 13 + name: "refs/foo as prefix", + args: []string{"--include", "refs/foo"}, + }, + { // 14 + name: "refs/foo as regexp", + args: []string{"--include", "/refs/foo/"}, + }, + { // 15 + name: "release tags", + args: []string{"--include", "/refs/tags/release-.*/"}, + }, + { // 16 + name: "combination", + args: []string{ + "--include=refs/heads", + "--tags", + "--exclude", "refs/heads/foo", + "--include", "/.*foo.*/", + "--exclude", "refs/foo", + "--exclude", "/refs/tags/release-.*/", + }, + }, + { // 17 + name: "branches-refgroup", + args: []string{"--include=@mygroup"}, + config: []git.ConfigEntry{ + {Key: "refgroup.mygroup.include", Value: "refs/heads"}, + }, + }, + { // 18 + name: "combination-refgroup", + args: []string{"--include=@mygroup"}, + config: []git.ConfigEntry{ + {Key: "refgroup.mygroup.include", Value: "refs/heads"}, + {Key: "refgroup.mygroup.include", Value: "refs/tags"}, + {Key: "refgroup.mygroup.exclude", Value: "refs/heads/foo"}, + {Key: "refgroup.mygroup.includeRegexp", Value: ".*foo.*"}, + {Key: "refgroup.mygroup.exclude", Value: "refs/foo"}, + {Key: "refgroup.mygroup.excludeRegexp", Value: "refs/tags/release-.*"}, + }, + }, + } { + i, p := i, p + t.Run( + p.name, + func(t *testing.T) { + t.Parallel() + + repo := repo.Clone(t, "ref-selection") + defer repo.Remove(t) + + for _, e := range p.config { + repo.ConfigAdd(t, e.Key, e.Value) + } + + args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} + args = append(args, p.args...) + cmd := exec.Command(executable, args...) + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) + var stdout bytes.Buffer + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + err := cmd.Run() + assert.NoError(t, err) + + expectedStderr, expectedUniqueCommitCount := computeExpectations(i) + + // Make sure that the right number of commits was scanned: + var v struct { + UniqueCommitCount struct { + Value int + } + } + err = json.Unmarshal(stdout.Bytes(), &v) + if assert.NoError(t, err) { + assert.EqualValues(t, expectedUniqueCommitCount, v.UniqueCommitCount.Value) + } + + // Make sure that the right references were reported scanned: + assert.Equal(t, expectedStderr, stderr.String()) + }, + ) + } +} + +func TestRefgroups(t *testing.T) { + t.Parallel() + + references := []string{ + "refs/changes/20/884120/1", + "refs/changes/45/12345/42", + "refs/fo", + "refs/foo", + "refs/heads/foo", + "refs/heads/main", + "refs/notes/discussion", + "refs/notes/tests/build", + "refs/notes/tests/default", + "refs/pull/1/head", + "refs/pull/1/merge", + "refs/pull/123/head", + "refs/pull/1234/head", + "refs/remotes/origin/master", + "refs/remotes/upstream/foo", + "refs/remotes/upstream/master", + "refs/stash", + "refs/tags/foolish", + "refs/tags/other", + "refs/tags/release-1", + "refs/tags/release-2", } - err = repo.UpdateRef("refs/heads/master", oid) - if err != nil { - return nil, err + // Create a test repo with one orphan commit per refname: + repo := testutils.NewTestRepo(t, true, "refgroups") + t.Cleanup(func() { repo.Remove(t) }) + + for _, refname := range references { + repo.CreateReferencedOrphan(t, refname) } - return repo, nil + executable := sizerExe(t) + + for _, p := range []struct { + name string + args []string + config []git.ConfigEntry + stdout string + stderr string + }{ + { + name: "no arguments", + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Git notes | 3 | | +| * Git stash | 1 | | +| * Other | 2 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): ++ refs/changes/20/884120/1 ++ refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main ++ refs/notes/discussion ++ refs/notes/tests/build ++ refs/notes/tests/default ++ refs/pull/1/head ++ refs/pull/1/merge ++ refs/pull/123/head ++ refs/pull/1234/head ++ refs/remotes/origin/master ++ refs/remotes/upstream/foo ++ refs/remotes/upstream/master ++ refs/stash ++ refs/tags/foolish ++ refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + { + name: "nested-groups", + config: []git.ConfigEntry{ + // Note that refgroup "misc" is defined implicitly. + + {Key: "refgroup.misc.foo.includeRegexp", Value: ".*foo.*"}, + + {Key: "refgroup.misc.foo.oatend.includeRegexp", Value: ".*o"}, + + {Key: "refgroup.misc.foo.bogus.include", Value: "bogus"}, + + {Key: "refgroup.tags.releases.name", Value: "Releases"}, + {Key: "refgroup.tags.releases.includeRegexp", Value: "refs/tags/release-.*"}, + }, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Releases | 2 | | +| * Other | 2 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Git notes | 3 | | +| * Git stash | 1 | | +| * misc | 4 | | +| * foo | 4 | | +| * oatend | 3 | | +| * Other | 1 | | +| * Other | 1 | | +| | | | +`[1:], + }, + { + name: "include-refgroups", + args: []string{"--include=@branches", "--include=@tags.releases", "--include=@oatend"}, + config: []git.ConfigEntry{ + {Key: "refgroup.oatend.includeRegexp", Value: ".*o"}, + + {Key: "refgroup.tags.releases.name", Value: "Releases"}, + {Key: "refgroup.tags.releases.includeRegexp", Value: "refs/tags/release-.*"}, + }, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 2 | | +| * Releases | 2 | | +| * Remote-tracking refs | 1 | | +| * oatend | 4 | | +| * Ignored | 14 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): + refs/changes/20/884120/1 + refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main + refs/notes/discussion + refs/notes/tests/build + refs/notes/tests/default + refs/pull/1/head + refs/pull/1/merge + refs/pull/123/head + refs/pull/1234/head + refs/remotes/origin/master ++ refs/remotes/upstream/foo + refs/remotes/upstream/master + refs/stash + refs/tags/foolish + refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + { + name: "exclude-refgroup", + args: []string{"--exclude=@stash", "--exclude=@notes"}, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Other | 2 | | +| * Ignored | 4 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): ++ refs/changes/20/884120/1 ++ refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main + refs/notes/discussion + refs/notes/tests/build + refs/notes/tests/default ++ refs/pull/1/head ++ refs/pull/1/merge ++ refs/pull/123/head ++ refs/pull/1234/head ++ refs/remotes/origin/master ++ refs/remotes/upstream/foo ++ refs/remotes/upstream/master + refs/stash ++ refs/tags/foolish ++ refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + } { + p := p + t.Run( + p.name, + func(t *testing.T) { + t.Parallel() + + repo := repo.Clone(t, "refgroups") + defer repo.Remove(t) + + for _, e := range p.config { + repo.ConfigAdd(t, e.Key, e.Value) + } + + args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) + cmd := exec.Command(executable, args...) + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) + var stdout bytes.Buffer + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + err := cmd.Run() + assert.NoError(t, err) + + assert.Contains(t, stdout.String(), p.stdout) + if p.stderr != "" { + assert.Equal(t, stderr.String(), p.stderr) + } + }, + ) + } } func pow(x uint64, n int) uint64 { @@ -149,100 +550,177 @@ func pow(x uint64, n int) uint64 { return p } +type refGrouper struct{} + +func (rg refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + return true, nil +} + +func (rg refGrouper) Groups() []sizes.RefGroup { + return nil +} + func TestBomb(t *testing.T) { t.Parallel() - assert := assert.New(t) - repo, err := newGitBomb("bomb", 10, 10, "boom!\n") - if err != nil { - t.Errorf("failed to create bomb: %s", err) - } - defer os.RemoveAll(repo.Path()) + ctx := context.Background() - h, err := sizes.ScanRepositoryUsingGraph( - repo, git.AllReferencesFilter, sizes.NameStyleFull, false, - ) - if !assert.NoError(err) { - return - } + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) + + newGitBomb(t, testRepo, 10, 10, "boom!\n") + + repo := testRepo.Repository(t) + + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - assert.Equal(counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(counts.Count64(169), h.UniqueCommitSize, "unique commit size") - assert.Equal(counts.Count32(169), h.MaxCommitSize, "max commit size") - assert.Equal("refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal("refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal("refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal("refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal("refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal("refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "tagged-tags") - require.NoError(t, err, "creating temporary directory") - defer func() { - os.RemoveAll(path) - }() + ctx := context.Background() - cmd := exec.Command("git", "init", path) - require.NoError(t, cmd.Run(), "initializing repo") - repo, err := git.NewRepository(path) - require.NoError(t, err, "initializing Repository object") + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd = gitCommand(t, repo, "commit", "-m", "initial", "--allow-empty") - addAuthorInfo(cmd, ×tamp) + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = gitCommand(t, repo, "tag", "-m", "tag 1", "tag", "master") - addAuthorInfo(cmd, ×tamp) + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = gitCommand(t, repo, "tag", "-m", "tag 2", "bag", "tag") - addAuthorInfo(cmd, ×tamp) + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = gitCommand(t, repo, "tag", "-m", "tag 3", "wag", "bag") - addAuthorInfo(cmd, ×tamp) + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo, git.AllReferencesFilter, sizes.NameStyleNone, false, + context.Background(), repo, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -250,30 +728,33 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "subdir") - require.NoError(t, err, "creating temporary directory") - defer func() { - os.RemoveAll(path) - }() + ctx := context.Background() - cmd := exec.Command("git", "init", path) - require.NoError(t, cmd.Run(), "initializing repo") - repo, err := git.NewRepository(path) - require.NoError(t, err, "initializing Repository object") + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - addFile(t, path, repo, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd = gitCommand(t, repo, "commit", "-m", "initial") - addAuthorInfo(cmd, ×tamp) + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") - repo2, err := git.NewRepository(filepath.Join(path, "subdir")) - require.NoError(t, err, "creating Repository object in subdirectory") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo2, git.AllReferencesFilter, sizes.NameStyleNone, false, + context.Background(), testRepo.Repository(t), + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -281,51 +762,64 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "submodule") + + ctx := context.Background() + + tmp, err := os.MkdirTemp("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { - os.RemoveAll(path) + os.RemoveAll(tmp) }() timestamp := time.Unix(1112911993, 0) - submPath := filepath.Join(path, "subm") - cmd := exec.Command("git", "init", submPath) - require.NoError(t, cmd.Run(), "initializing subm repo") - submRepo, err := git.NewRepository(submPath) - require.NoError(t, err, "initializing subm Repository object") - addFile(t, submPath, submRepo, "submfile1.txt", "Hello, submodule!\n") - addFile(t, submPath, submRepo, "submfile2.txt", "Hello again, submodule!\n") - addFile(t, submPath, submRepo, "submfile3.txt", "Hello again, submodule!\n") - - cmd = gitCommand(t, submRepo, "commit", "-m", "subm initial") - addAuthorInfo(cmd, ×tamp) + submTestRepo := testutils.TestRepo{ + Path: filepath.Join(tmp, "subm"), + } + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainPath := filepath.Join(path, "main") - cmd = exec.Command("git", "init", mainPath) - require.NoError(t, cmd.Run(), "initializing main repo") - mainRepo, err := git.NewRepository(mainPath) - require.NoError(t, err, "initializing main Repository object") - addFile(t, mainPath, mainRepo, "mainfile.txt", "Hello, main!\n") + mainTestRepo := testutils.TestRepo{ + Path: filepath.Join(tmp, "main"), + } + mainTestRepo.Init(t, false) + + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = gitCommand(t, mainRepo, "commit", "-m", "main initial") - addAuthorInfo(cmd, ×tamp) + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = gitCommand(t, mainRepo, "submodule", "add", submPath, "sub") - cmd.Dir = mainPath + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = gitCommand(t, mainRepo, "commit", "-m", "add submodule") - addAuthorInfo(cmd, ×tamp) + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo, git.AllReferencesFilter, sizes.NameStyleNone, false, + context.Background(), mainTestRepo.Repository(t), + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -333,12 +827,66 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2, err := git.NewRepository(filepath.Join(mainPath, "sub")) - require.NoError(t, err, "creating Repository object in submodule") + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), + } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( - submRepo2, git.AllReferencesFilter, sizes.NameStyleNone, false, + context.Background(), submRepo2, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + output, err := cmd.CombinedOutput() + + if err != nil && strings.Contains(string(output), "object-format") { + t.Skip("skipping due to lack of SHA256 support") + } + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/go.mod b/go.mod index 20bf4b8..9db294d 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,20 @@ module github.com/github/git-sizer -go 1.13 +go 1.17 require ( + github.com/cli/safeexec v1.0.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.4.0 - gopkg.in/yaml.v2 v2.2.7 // indirect + github.com/stretchr/testify v1.8.1 + golang.org/x/sync v0.1.0 // indirect +) + +require github.com/github/go-pipe v1.0.2 + +require ( + github.com/kr/pretty v0.1.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index dff9970..5c5d0a9 100644 --- a/go.sum +++ b/go.sum @@ -1,17 +1,61 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI= +github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/github/go-pipe v1.0.2 h1:befTXflsc6ir/h9f6Q7QCDmfojoBswD1MfQrPhmmSoA= +github.com/github/go-pipe v1.0.2/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo= -gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/refopts/filter_group_value.go b/internal/refopts/filter_group_value.go new file mode 100644 index 0000000..4e2d360 --- /dev/null +++ b/internal/refopts/filter_group_value.go @@ -0,0 +1,96 @@ +package refopts + +import ( + "fmt" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +// filterGroupValue handles `--refgroup=REFGROUP` options, which +// affect the top-level filter. These are a little bit tricky, because +// the references matched by a refgroup depend on its parents (because +// if the parents don't allow the reference, it won't even get tested +// by the regroup's own filter) and also its children (because if the +// refgroup doesn't have its own filter, then it is defined to be the +// union of its children). Meanwhile, when testing parents, we +// shouldn't test the top-level group, because that's what we are +// trying to affect. +// +// The filtering itself is implemented using a `refGroupFilter`, which +// contains a pointer to a `refGroup` and uses it (including its +// `parent` and `subgroups` to figure out what should be allowed. +type filterGroupValue struct { + rgb *RefGroupBuilder +} + +func (v *filterGroupValue) Set(symbolString string) error { + symbol := sizes.RefGroupSymbol(symbolString) + + refGroup, ok := v.rgb.groups[symbol] + + if !ok || symbol == "" { + return fmt.Errorf("refgroup '%s' is not defined", symbol) + } + + v.rgb.topLevelGroup.filter = git.Include.Combine( + v.rgb.topLevelGroup.filter, refGroupFilter{refGroup}, + ) + + return nil +} + +func (v *filterGroupValue) Get() interface{} { + return nil +} + +func (v *filterGroupValue) String() string { + return "" +} + +func (v *filterGroupValue) Type() string { + return "name" +} + +// refGroupFilter is a filter based on what would be allowed through +// by a particular refGroup. This is used as part of a top-level +// filter, so it ignores what the top-level filter would say. +type refGroupFilter struct { + refGroup *refGroup +} + +func (f refGroupFilter) Filter(refname string) bool { + return refGroupPasses(f.refGroup.parent, refname) && + refGroupMatches(f.refGroup, refname) +} + +// refGroupMatches retruns true iff `rg` would allow `refname` +// through, not considering its parents. If `rg` doesn't have its own +// filter, this consults its children. +func refGroupMatches(rg *refGroup, refname string) bool { + if rg.filter != nil { + return rg.filter.Filter(refname) + } + + for _, sg := range rg.subgroups { + if refGroupMatches(sg, refname) { + return true + } + } + + return false +} + +// refGroupPasses returns true iff `rg` and the parents of `rg` (not +// including the top-level group) would allow `refname` through. This +// does not consider children of `rg`, which we would still need to +// consult if `rg` doesn't have a filter of its own. +func refGroupPasses(rg *refGroup, refname string) bool { + if rg.Symbol == "" { + return true + } + if !refGroupPasses(rg.parent, refname) { + return false + } + return rg.filter == nil || rg.filter.Filter(refname) +} diff --git a/internal/refopts/filter_value.go b/internal/refopts/filter_value.go new file mode 100644 index 0000000..5dec209 --- /dev/null +++ b/internal/refopts/filter_value.go @@ -0,0 +1,124 @@ +package refopts + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +type filterValue struct { + // rgb is the RefGroupBuilder whose top-level filter is + // affected if this option is used. + rgb *RefGroupBuilder + + // combiner specifies how the filter generated by this option + // is combined with the existing filter; i.e., does it cause + // the matching references to be included or excluded? + combiner git.Combiner + + // pattern, if it is set, is the pattern (prefix or regexp) to + // be matched. If it is not set, then the user must supply the + // pattern. + pattern string + + // regexp specifies whether `pattern` should be interpreted as + // a regexp (as opposed to handling it flexibly). + regexp bool +} + +func (v *filterValue) Set(s string) error { + var filter git.ReferenceFilter + combiner := v.combiner + + var pattern string + if v.pattern != "" { + // The pattern is fixed for this option: + pattern = v.pattern + + // It's not really expected, but if the user supplied a + // `false` boolean value, invert the polarity: + b, err := strconv.ParseBool(s) + if err != nil { + return err + } + if !b { + combiner = combiner.Inverted() + } + } else { + // The user must supply the pattern. + pattern = s + } + + if v.regexp { + var err error + filter, err = git.RegexpFilter(pattern) + if err != nil { + return fmt.Errorf("invalid regexp: %q", s) + } + } else { + var err error + filter, err = v.interpretFlexibly(pattern) + if err != nil { + return err + } + } + + v.rgb.topLevelGroup.filter = combiner.Combine(v.rgb.topLevelGroup.filter, filter) + + return nil +} + +// Interpret an option argument flexibly: +// +// * If it is bracketed with `/` characters, treat it as a regexp. +// +// * If it starts with `@`, then consider it a refgroup name. That +// refgroup must already be defined. Use its filter. This construct +// is only allowed at the top level. +// +// * Otherwise treat it as a prefix. +func (v *filterValue) interpretFlexibly(s string) (git.ReferenceFilter, error) { + if len(s) >= 2 && strings.HasPrefix(s, "/") && strings.HasSuffix(s, "/") { + pattern := s[1 : len(s)-1] + return git.RegexpFilter(pattern) + } + + if len(s) >= 1 && s[0] == '@' { + name := sizes.RefGroupSymbol(s[1:]) + if name == "" { + return nil, errors.New("missing refgroup name") + } + + refGroup := v.rgb.groups[name] + if refGroup == nil { + return nil, fmt.Errorf("undefined refgroup '%s'", name) + } + + return refGroupFilter{refGroup}, nil + } + + return git.PrefixFilter(s), nil +} + +func (v *filterValue) Get() interface{} { + return nil +} + +func (v *filterValue) String() string { + return "" +} + +func (v *filterValue) Type() string { + switch { + case v.pattern != "": + return "bool" + case v.regexp: + return "regexp" + default: + return "prefix" + } +} diff --git a/internal/refopts/ref_group.go b/internal/refopts/ref_group.go new file mode 100644 index 0000000..b86b333 --- /dev/null +++ b/internal/refopts/ref_group.go @@ -0,0 +1,122 @@ +package refopts + +import ( + "fmt" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +// refGroup represents one reference group and also its relationship +// to its parent group and any subgroups.. Note that reference groups +// don't intrinsically have anything to do with the layout of the +// reference namespace, but they will often be used that way. +type refGroup struct { + sizes.RefGroup + + // filter is the filter for just this reference group. Filters + // for any parent groups must also be applied. + filter git.ReferenceFilter + + parent *refGroup + + // subgroups are the `refGroup` instances representing any + // direct subgroups. + subgroups []*refGroup + + // otherRefGroup, if set, is the refGroup for tallying + // references that match `filter` but don't match any of the + // subgroups. + otherRefGroup *sizes.RefGroup +} + +func (rg *refGroup) collectSymbols(refname string) (bool, []sizes.RefGroupSymbol) { + walk := false + var symbols []sizes.RefGroupSymbol + + if rg.filter == nil { + // The tree doesn't have its own filter. Consider it matched + // iff at least one subtree matches it. + + for _, sg := range rg.subgroups { + w, ss := sg.collectSymbols(refname) + if w { + walk = true + } + if len(ss) > 0 && len(symbols) == 0 { + symbols = append(symbols, rg.Symbol) + } + symbols = append(symbols, ss...) + } + } else { + // The tree has its own filter. If it doesn't match the + // reference, then the subtrees don't even get a chance to + // try. + if !rg.filter.Filter(refname) { + return false, nil + } + + walk = true + symbols = append(symbols, rg.Symbol) + + for _, sg := range rg.subgroups { + _, ss := sg.collectSymbols(refname) + symbols = append(symbols, ss...) + } + + // References that match the tree filter but no subtree + // filters are counted as "other": + if rg.otherRefGroup != nil && len(symbols) == 1 { + symbols = append(symbols, rg.otherRefGroup.Symbol) + } + } + + return walk, symbols +} + +// augmentFromConfig augments `rg` based on configuration in the +// gitconfig and returns the result. It is not considered an error if +// there are no usable config entries for the filter. +func (rg *refGroup) augmentFromConfig(configger Configger) error { + config, err := configger.GetConfig(fmt.Sprintf("refgroup.%s", rg.Symbol)) + if err != nil { + return err + } + + for _, entry := range config.Entries { + switch entry.Key { + case "name": + rg.Name = entry.Value + case "include": + rg.filter = git.Include.Combine( + rg.filter, git.PrefixFilter(entry.Value), + ) + case "includeregexp": + f, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for '%s': %w", + config.FullKey(entry.Key), err, + ) + } + rg.filter = git.Include.Combine(rg.filter, f) + case "exclude": + rg.filter = git.Exclude.Combine( + rg.filter, git.PrefixFilter(entry.Value), + ) + case "excluderegexp": + f, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for '%s': %w", + config.FullKey(entry.Key), err, + ) + } + rg.filter = git.Exclude.Combine(rg.filter, f) + default: + // Ignore unrecognized keys. + } + } + + return nil +} diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go new file mode 100644 index 0000000..48f1190 --- /dev/null +++ b/internal/refopts/ref_group_builder.go @@ -0,0 +1,349 @@ +package refopts + +import ( + "fmt" + "strings" + + "github.com/spf13/pflag" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +// Configger is an abstraction for a thing that can read gitconfig. +type Configger interface { + GetConfig(prefix string) (*git.Config, error) +} + +// RefGroupBuilder handles reference-related options and puts together +// a `sizes.RefGrouper` to be used by the main part of the program. +type RefGroupBuilder struct { + topLevelGroup *refGroup + groups map[sizes.RefGroupSymbol]*refGroup +} + +// NewRefGroupBuilder creates and returns a `RefGroupBuilder` +// instance. +func NewRefGroupBuilder(configger Configger) (*RefGroupBuilder, error) { + tlg := refGroup{ + RefGroup: sizes.RefGroup{ + Symbol: "", + Name: "Refs to walk", + }, + } + + rgb := RefGroupBuilder{ + topLevelGroup: &tlg, + groups: map[sizes.RefGroupSymbol]*refGroup{ + "": &tlg, + }, + } + + rgb.initializeStandardRefgroups() + if err := rgb.readRefgroupsFromGitconfig(configger); err != nil { + return nil, err + } + + return &rgb, nil +} + +// getGroup returns the `refGroup` for the symbol with the specified +// name, first creating it (and any missing parents) if needed. +func (rgb *RefGroupBuilder) getGroup(symbol sizes.RefGroupSymbol) *refGroup { + if rg, ok := rgb.groups[symbol]; ok { + return rg + } + + parentSymbol := parentName(symbol) + parent := rgb.getGroup(parentSymbol) + + rg := refGroup{ + RefGroup: sizes.RefGroup{ + Symbol: symbol, + }, + parent: parent, + } + + rgb.groups[symbol] = &rg + parent.subgroups = append(parent.subgroups, &rg) + return &rg +} + +// parentName returns the symbol of the refgroup that is the parent of +// `symbol`, or "" if `symbol` is the top-level group. +func parentName(symbol sizes.RefGroupSymbol) sizes.RefGroupSymbol { + i := strings.LastIndexByte(string(symbol), '.') + if i == -1 { + return "" + } + return symbol[:i] +} + +// initializeStandardRefgroups initializes the built-in refgroups +// ("branches", "tags", etc). +func (rgb *RefGroupBuilder) initializeStandardRefgroups() { + initializeGroup := func( + symbol sizes.RefGroupSymbol, name string, filter git.ReferenceFilter, + ) { + rg := rgb.getGroup(symbol) + rg.Name = name + rg.filter = filter + } + + initializeGroup("branches", "Branches", git.PrefixFilter("refs/heads/")) + initializeGroup("tags", "Tags", git.PrefixFilter("refs/tags/")) + initializeGroup("remotes", "Remote-tracking refs", git.PrefixFilter("refs/remotes/")) + initializeGroup("pulls", "Pull request refs", git.PrefixFilter("refs/pull/")) + + filter, err := git.RegexpFilter(`refs/changes/\d{2}/\d+/\d+`) + if err != nil { + panic("internal error") + } + initializeGroup("changes", "Changeset refs", filter) + + initializeGroup("notes", "Git notes", git.PrefixFilter("refs/notes/")) + + filter, err = git.RegexpFilter(`refs/stash`) + if err != nil { + panic("internal error") + } + initializeGroup("stash", "Git stash", filter) +} + +// readRefgroupsFromGitconfig reads any refgroups defined in the +// gitconfig into `rgb`. Any configuration settings for the built-in +// groups are added to the pre-existing definitions of those groups. +func (rgb *RefGroupBuilder) readRefgroupsFromGitconfig(configger Configger) error { + if configger == nil { + // At this point, it is not yet certain that the command was + // run inside a Git repository. If not, ignore this option + // (the command will error out anyway). + return nil + } + + config, err := configger.GetConfig("refgroup") + if err != nil { + return err + } + + seen := make(map[sizes.RefGroupSymbol]bool) + for _, entry := range config.Entries { + symbol, _ := splitKey(entry.Key) + if symbol == "" || seen[symbol] { + // The point of this loop is only to find + // _which_ groups are defined, so we only need + // to visit each one once. + continue + } + + rg := rgb.getGroup(symbol) + if err := rg.augmentFromConfig(configger); err != nil { + return err + } + + seen[symbol] = true + } + + return nil +} + +// splitKey splits `key`, which is part of a gitconfig key, into the +// refgroup symbol to which it applies and the field name within that +// section. +func splitKey(key string) (sizes.RefGroupSymbol, string) { + i := strings.LastIndexByte(key, '.') + if i == -1 { + return "", key + } + return sizes.RefGroupSymbol(key[:i]), key[i+1:] +} + +// AddRefopts adds the reference-related options to `flags`. +func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { + flags.Var( + &filterValue{rgb, git.Include, "", false}, "include", + "include specified references", + ) + + flag := flags.VarPF( + &filterValue{rgb, git.Include, "", true}, "include-regexp", "", + "include references matching the specified regular expression", + ) + flag.Hidden = true + flag.Deprecated = "use --include=/REGEXP/" + + flags.Var( + &filterValue{rgb, git.Exclude, "", false}, "exclude", + "exclude specified references", + ) + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "", true}, "exclude-regexp", "", + "exclude references matching the specified regular expression", + ) + flag.Hidden = true + flag.Deprecated = "use --exclude=/REGEXP/" + + flag = flags.VarPF( + &filterValue{rgb, git.Include, "refs/heads", false}, "branches", "", + "process all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "refs/heads", false}, "no-branches", "", + "exclude all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Include, "refs/tags", false}, "tags", "", + "process all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "refs/tags", false}, "no-tags", "", + "exclude all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Include, "refs/remotes", false}, "remotes", "", + "process all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "refs/remotes", false}, "no-remotes", "", + "exclude all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Include, "refs/notes", false}, "notes", "", + "process all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "refs/notes", false}, "no-notes", "", + "exclude all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Include, "refs/stash", true}, "stash", "", + "process refs/stash", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "refs/stash", true}, "no-stash", "", + "exclude refs/stash", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterGroupValue{rgb}, "refgroup", "", + "process references in refgroup defined by gitconfig", + ) + flag.Hidden = true + flag.Deprecated = "use --include=@REFGROUP" +} + +// Finish collects the information gained from processing the options +// and returns a `sizes.RefGrouper`. +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { + if rgb.topLevelGroup.filter == nil { + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } + } + + refGrouper := refGrouper{ + topLevelGroup: rgb.topLevelGroup, + } + + if err := refGrouper.fillInTree(refGrouper.topLevelGroup); err != nil { + return nil, err + } + + if refGrouper.topLevelGroup.filter != nil { + refGrouper.ignoredRefGroup = &sizes.RefGroup{ + Symbol: "ignored", + Name: "Ignored", + } + refGrouper.refGroups = append(refGrouper.refGroups, *refGrouper.ignoredRefGroup) + } + + return &refGrouper, nil +} + +// refGrouper is a `sizes.RefGrouper` based on a hierarchy of nested +// refgroups. +type refGrouper struct { + topLevelGroup *refGroup + refGroups []sizes.RefGroup + + // ignoredRefGroup, if set, is the reference group for + // tallying references that don't match at all. + ignoredRefGroup *sizes.RefGroup +} + +// fillInTree processes the refgroups in the tree rooted at `rg`, +// setting default names where they are missing, verifying that they +// are all defined, adding "Other" groups where needed, and adding the +// refgroups in depth-first-traversal order to `refGrouper.refGroups`. +func (refGrouper *refGrouper) fillInTree(rg *refGroup) error { + if rg.Name == "" { + _, rg.Name = splitKey(string(rg.Symbol)) + } + + if rg.filter == nil && len(rg.subgroups) == 0 { + return fmt.Errorf("refgroup '%s' is not defined", rg.Symbol) + } + + refGrouper.refGroups = append(refGrouper.refGroups, rg.RefGroup) + + for _, rg := range rg.subgroups { + if err := refGrouper.fillInTree(rg); err != nil { + return err + } + } + + if len(rg.subgroups) != 0 { + var otherSymbol sizes.RefGroupSymbol + if rg.Symbol == "" { + otherSymbol = "other" + } else { + otherSymbol = sizes.RefGroupSymbol(fmt.Sprintf("%s.other", rg.Symbol)) + } + rg.otherRefGroup = &sizes.RefGroup{ + Symbol: otherSymbol, + Name: "Other", + } + refGrouper.refGroups = append(refGrouper.refGroups, *rg.otherRefGroup) + } + + return nil +} + +// Categorize decides whether to walk the reference named `refname` +// and which refgroup(s) it should be counted in. +func (refGrouper *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + walk, symbols := refGrouper.topLevelGroup.collectSymbols(refname) + if !walk && refGrouper.ignoredRefGroup != nil { + symbols = append(symbols, refGrouper.ignoredRefGroup.Symbol) + } + return walk, symbols +} + +// Groups returns a list of all defined refgroups, in the order that +// they should be output. +func (refGrouper *refGrouper) Groups() []sizes.RefGroup { + return refGrouper.refGroups +} diff --git a/internal/refopts/show_ref_grouper.go b/internal/refopts/show_ref_grouper.go new file mode 100644 index 0000000..da78ca1 --- /dev/null +++ b/internal/refopts/show_ref_grouper.go @@ -0,0 +1,34 @@ +package refopts + +import ( + "fmt" + "io" + + "github.com/github/git-sizer/sizes" +) + +// showRefFilter is a `git.ReferenceFilter` that logs its choices to +// an `io.Writer`. +type showRefGrouper struct { + sizes.RefGrouper + w io.Writer +} + +// Return a `sizes.RefGrouper` that wraps its argument and behaves +// like it except that it also logs its decisions to an `io.Writer`. +func NewShowRefGrouper(rg sizes.RefGrouper, w io.Writer) sizes.RefGrouper { + return showRefGrouper{ + RefGrouper: rg, + w: w, + } +} + +func (rg showRefGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + walk, symbols := rg.RefGrouper.Categorize(refname) + if walk { + fmt.Fprintf(rg.w, "+ %s\n", refname) + } else { + fmt.Fprintf(rg.w, " %s\n", refname) + } + return walk, symbols +} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go new file mode 100644 index 0000000..e14e487 --- /dev/null +++ b/internal/testutils/repoutils.go @@ -0,0 +1,296 @@ +package testutils + +import ( + "bytes" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/github/git-sizer/git" +) + +// TestRepo represents a git repository used for tests. +type TestRepo struct { + Path string + bare bool +} + +// NewTestRepo creates and initializes a test repository in a +// temporary directory constructed using `pattern`. The caller must +// delete the repository by calling `repo.Remove()`. +func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { + t.Helper() + + path, err := os.MkdirTemp("", pattern) + require.NoError(t, err) + + repo := TestRepo{Path: path} + + repo.Init(t, bare) + + return &TestRepo{ + Path: path, + bare: bare, + } +} + +// Init initializes a git repository at `repo.Path`. +func (repo *TestRepo) Init(t *testing.T, bare bool) { + t.Helper() + + // Don't use `GitCommand()` because the directory might not + // exist yet: + var cmd *exec.Cmd + if bare { + //nolint:gosec // `repo.Path` is a path that we created. + cmd = exec.Command("git", "init", "--bare", repo.Path) + } else { + //nolint:gosec // `repo.Path` is a path that we created. + cmd = exec.Command("git", "init", repo.Path) + } + cmd.Env = CleanGitEnv() + err := cmd.Run() + require.NoError(t, err) +} + +// Remove deletes the test repository at `repo.Path`. +func (repo *TestRepo) Remove(t *testing.T) { + t.Helper() + + _ = os.RemoveAll(repo.Path) +} + +// Clone creates a clone of `repo` at a temporary path constructued +// using `pattern`. The caller is responsible for removing it when +// done by calling `Remove()`. +func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { + t.Helper() + + path, err := os.MkdirTemp("", pattern) + require.NoError(t, err) + + err = repo.GitCommand( + t, "clone", "--bare", "--mirror", repo.Path, path, + ).Run() + require.NoError(t, err) + + return &TestRepo{ + Path: path, + } +} + +// Repository returns a `*git.Repository` for `repo`. +func (repo *TestRepo) Repository(t *testing.T) *git.Repository { + t.Helper() + + if repo.bare { + r, err := git.NewRepositoryFromGitDir(repo.Path) + require.NoError(t, err) + return r + } else { + r, err := git.NewRepositoryFromPath(repo.Path) + require.NoError(t, err) + return r + } +} + +// localEnvVars is a list of the variable names that should be cleared +// to give Git a clean environment. +var localEnvVars = func() map[string]bool { + m := map[string]bool{ + "HOME": true, + "XDG_CONFIG_HOME": true, + } + out, err := exec.Command("git", "rev-parse", "--local-env-vars").Output() + if err != nil { + return m + } + for _, k := range strings.Fields(string(out)) { + m[k] = true + } + return m +}() + +// CleanGitEnv returns a clean environment for running `git` commands +// so that they won't be affected by the local environment. +func CleanGitEnv() []string { + osEnv := os.Environ() + env := make([]string, 0, len(osEnv)+3) + for _, e := range osEnv { + i := strings.IndexByte(e, '=') + if i == -1 { + // This shouldn't happen, but if it does, + // ignore it. + continue + } + k := e[:i] + if localEnvVars[k] { + continue + } + env = append(env, e) + } + return append( + env, + fmt.Sprintf("HOME=%s", os.DevNull), + fmt.Sprintf("XDG_CONFIG_HOME=%s", os.DevNull), + "GIT_CONFIG_NOSYSTEM=1", + ) +} + +// GitCommand creates an `*exec.Cmd` for running `git` in `repo` with +// the specified arguments. +func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { + t.Helper() + + gitArgs := []string{"-C", repo.Path} + gitArgs = append(gitArgs, args...) + + //nolint:gosec // The args all come from the test code. + cmd := exec.Command("git", gitArgs...) + cmd.Env = CleanGitEnv() + return cmd +} + +// UpdateRef updates the reference named `refname` to the value `oid`. +func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { + t.Helper() + + var cmd *exec.Cmd + + if git.IsNullOID(oid) { + cmd = repo.GitCommand(t, "update-ref", "-d", refname) + } else { + cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) + } + require.NoError(t, cmd.Run()) +} + +// CreateObject creates a new Git object, of the specified type, in +// the repository at `repoPath`. `writer` is a function that generates +// the object contents in `git hash-object` input format. +func (repo *TestRepo) CreateObject( + t *testing.T, otype git.ObjectType, writer func(io.Writer) error, +) git.OID { + t.Helper() + + cmd := repo.GitCommand(t, "hash-object", "-w", "-t", string(otype), "--stdin") + in, err := cmd.StdinPipe() + require.NoError(t, err) + + out, err := cmd.StdoutPipe() + require.NoError(t, err) + cmd.Stderr = os.Stderr + + err = cmd.Start() + require.NoError(t, err) + + err = writer(in) + err2 := in.Close() + if !assert.NoError(t, err) { + _ = cmd.Wait() + t.FailNow() + } + if !assert.NoError(t, err2) { + _ = cmd.Wait() + t.FailNow() + } + + output, err := io.ReadAll(out) + err2 = cmd.Wait() + require.NoError(t, err) + require.NoError(t, err2) + + oid, err := git.NewOID(string(bytes.TrimSpace(output))) + require.NoError(t, err) + return oid +} + +// AddFile adds and stages a file in `repo` at path `relativePath` +// with the specified `contents`. This must be run in a non-bare +// repository. +func (repo *TestRepo) AddFile(t *testing.T, relativePath, contents string) { + t.Helper() + + dirPath := filepath.Dir(relativePath) + if dirPath != "." { + require.NoError( + t, + os.MkdirAll(filepath.Join(repo.Path, dirPath), 0o777), + "creating subdir", + ) + } + + filename := filepath.Join(repo.Path, relativePath) + f, err := os.Create(filename) + require.NoErrorf(t, err, "creating file %q", filename) + _, err = f.WriteString(contents) + require.NoErrorf(t, err, "writing to file %q", filename) + require.NoErrorf(t, f.Close(), "closing file %q", filename) + + cmd := repo.GitCommand(t, "add", relativePath) + require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) +} + +// CreateReferencedOrphan creates a simple new orphan commit and +// points the reference with name `refname` at it. This can be run in +// a bare or non-bare repository. +func (repo *TestRepo) CreateReferencedOrphan(t *testing.T, refname string) { + t.Helper() + + oid := repo.CreateObject(t, "blob", func(w io.Writer) error { + _, err := fmt.Fprintf(w, "%s\n", refname) + return err + }) + + oid = repo.CreateObject(t, "tree", func(w io.Writer) error { + _, err := fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) + return err + }) + + oid = repo.CreateObject(t, "commit", func(w io.Writer) error { + _, err := fmt.Fprintf( + w, + "tree %s\n"+ + "author Example 1112911993 -0700\n"+ + "committer Example 1112911993 -0700\n"+ + "\n"+ + "Commit for reference %s\n", + oid, refname, + ) + return err + }) + + repo.UpdateRef(t, refname, oid) +} + +// AddAuthorInfo adds environment variables to `cmd.Env` that set the +// Git author and committer to known values and set the timestamp to +// `*timestamp`. Then `*timestamp` is moved forward by a minute, so +// that each commit gets a unique timestamp. +func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { + cmd.Env = append(cmd.Env, + "GIT_AUTHOR_NAME=Arthur", + "GIT_AUTHOR_EMAIL=arthur@example.com", + fmt.Sprintf("GIT_AUTHOR_DATE=%d -0700", timestamp.Unix()), + "GIT_COMMITTER_NAME=Constance", + "GIT_COMMITTER_EMAIL=constance@example.com", + fmt.Sprintf("GIT_COMMITTER_DATE=%d -0700", timestamp.Unix()), + ) + *timestamp = timestamp.Add(60 * time.Second) +} + +// ConfigAdd adds a key-value pair to the gitconfig in `repo`. +func (repo *TestRepo) ConfigAdd(t *testing.T, key, value string) { + t.Helper() + + err := repo.GitCommand(t, "config", "--add", key, value).Run() + require.NoError(t, err) +} diff --git a/isatty/isatty_disabled.go b/isatty/isatty_disabled.go index c16f1d7..3121d33 100644 --- a/isatty/isatty_disabled.go +++ b/isatty/isatty_disabled.go @@ -1,7 +1,9 @@ +//go:build !isatty // +build !isatty package isatty +// Isatty is a stub implementation of `Isatty()` that always returns `true`. func Isatty(fd uintptr) (bool, error) { return true, nil } diff --git a/isatty/isatty_enabled.go b/isatty/isatty_enabled.go index 04f7516..94d7f53 100644 --- a/isatty/isatty_enabled.go +++ b/isatty/isatty_enabled.go @@ -1,3 +1,4 @@ +//go:build isatty // +build isatty package isatty @@ -12,6 +13,7 @@ import ( "syscall" ) +// Isatty tries to determine whether `fd` is a TTY. func Isatty(fd uintptr) (bool, error) { result, err := C.isatty(C.int(fd)) if err != nil && err != syscall.EINVAL { diff --git a/meter/meter.go b/meter/meter.go index 118e6d5..ea210fc 100644 --- a/meter/meter.go +++ b/meter/meter.go @@ -2,7 +2,7 @@ package meter import ( "fmt" - "os" + "io" "sync" "sync/atomic" "time" @@ -24,12 +24,16 @@ type Progress interface { Done() } +// Spinners is a slice of short strings that are repeatedly output in +// order to show the user that we are working, before we have any +// actual information to show. var Spinners = []string{"|", "(", "<", "-", "<", "(", "|", ")", ">", "-", ">", ")"} // progressMeter is a `Progress` that reports the current state every -// `period`. +// `period` to an `io.Writer`. type progressMeter struct { lock sync.Mutex + w io.Writer format string period time.Duration lastShownCount int64 @@ -42,8 +46,12 @@ type progressMeter struct { count int64 } -func NewProgressMeter(period time.Duration) Progress { +// NewProgressMeter returns a progress meter that can be used to show +// progress to a TTY periodically, including an increasing int64 +// value. +func NewProgressMeter(w io.Writer, period time.Duration) Progress { return &progressMeter{ + w: w, period: period, } } @@ -75,7 +83,7 @@ func (p *progressMeter) Start(format string) { } else { s = "" } - fmt.Fprintf(os.Stderr, p.format, c, s, "\r") + fmt.Fprintf(p.w, p.format, c, s, "\r") p.lock.Unlock() } }() @@ -94,14 +102,16 @@ func (p *progressMeter) Done() { defer p.lock.Unlock() p.ticker = nil c := atomic.LoadInt64(&p.count) - fmt.Fprintf(os.Stderr, p.format, c, " ", "\n") + fmt.Fprintf(p.w, p.format, c, " ", "\n") } // NoProgressMeter is a `Progress` that doesn't actually report // anything. -type NoProgressMeter struct{} +var NoProgressMeter noProgressMeter -func (p *NoProgressMeter) Start(string) {} -func (p *NoProgressMeter) Inc() {} -func (p *NoProgressMeter) Add(int64) {} -func (p *NoProgressMeter) Done() {} +type noProgressMeter struct{} + +func (p noProgressMeter) Start(string) {} +func (p noProgressMeter) Inc() {} +func (p noProgressMeter) Add(int64) {} +func (p noProgressMeter) Done() {} diff --git a/negated_bool_value.go b/negated_bool_value.go new file mode 100644 index 0000000..9dd19fe --- /dev/null +++ b/negated_bool_value.go @@ -0,0 +1,34 @@ +package main + +import ( + "strconv" +) + +// NegatedBoolValue is a `pflag.Value` that set a boolean variable to +// the inverse of what the argument would normally indicate (e.g., to +// implement `--no-foo`-style arguments). +type NegatedBoolValue struct { + value *bool +} + +func (v *NegatedBoolValue) Set(s string) error { + b, err := strconv.ParseBool(s) + *v.value = !b + return err +} + +func (v *NegatedBoolValue) Get() interface{} { + return !*v.value +} + +func (v *NegatedBoolValue) String() string { + if v == nil || v.value == nil { + return "true" + } + + return strconv.FormatBool(!*v.value) +} + +func (v *NegatedBoolValue) Type() string { + return "bool" +} diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 43ba12d..1e301fd 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.13? +# Is go installed, and at least 1.21? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 13 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 21 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.13.4 + GO_VERSION=go1.21.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 34bdf88..76d2195 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,19 +1,28 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.13.4 +# The checksums can be found on https://go.dev/dl +GO_VERSION=go1.21.3 -if [ $(uname -s) = "Darwin" ]; then - GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=9f0721551a24a1eb43d2005cd58bd7b17574e50384b8da8896b0754259790752 -elif [ $(uname -s) = "Linux" ]; then +case "$(uname -s):$(uname -m)" in +Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=692d17071736f74be04a72a06dab9cac1cd759377bd85316e52b2227604c004c -else + GO_PKG_SHA=1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8 + ;; +Darwin:x86_64) + GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz + GO_PKG_SHA=27014fc69e301d7588a169ca239b3cc609f0aa1abf38528bf0d20d3b259211eb + ;; +Darwin:arm64) + GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz + GO_PKG_SHA=65302a7a9f7a4834932b3a7a14cb8be51beddda757b567a2f9e0cbd0d7b5a6ab + ;; +*) echo 1>&2 "I don't know how to install Go on your platform." echo 1>&2 "Please install $GO_VERSION or later and add it to your PATH." exit 1 -fi + ;; +esac archivesum() { shasum -a256 "$ARCHIVE" @@ -30,7 +39,7 @@ fi ROOTDIR="$( cd "$( dirname "$0" )/.." && pwd )" VENDORDIR="$ROOTDIR/vendor" -DOWNLOAD_URL=https://storage.googleapis.com/golang/$GO_PKG +DOWNLOAD_URL=https://go.dev/dl/$GO_PKG ARCHIVE="$VENDORDIR/$GO_PKG" INSTALLDIR="$VENDORDIR/$GO_VERSION" export GOROOT="$INSTALLDIR/go" diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/footnotes.go b/sizes/footnotes.go index 4652e4e..3ecf013 100644 --- a/sizes/footnotes.go +++ b/sizes/footnotes.go @@ -5,17 +5,23 @@ import ( "fmt" ) +// Footnotes collects and numbers footnotes for a `table`. type Footnotes struct { footnotes []string indexes map[string]int } +// NewFootnotes creates and returns a new `Footnotes` instance. func NewFootnotes() *Footnotes { return &Footnotes{ indexes: make(map[string]int), } } +// CreateCitation adds a footnote with the specified text and returns +// the string that should be used to refer to it (e.g., "[2]"). If +// there is already a footnote with the exact same text, reuse its +// number. func (f *Footnotes) CreateCitation(footnote string) string { if footnote == "" { return "" @@ -30,6 +36,8 @@ func (f *Footnotes) CreateCitation(footnote string) string { return fmt.Sprintf("[%d]", index) } +// String returns a string representation of the footnote, including a +// trailing LF. func (f *Footnotes) String() string { if len(f.footnotes) == 0 { return "" diff --git a/sizes/graph.go b/sizes/graph.go index d358938..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -1,85 +1,67 @@ package sizes import ( - "bufio" + "context" "errors" "fmt" - "io" "sync" - "time" "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" "github.com/github/git-sizer/meter" ) +type Root interface { + Name() string + OID() git.OID + Walk() bool +} + +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol +} + +// ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which +// references to scan and how to group them. `nameStyle` specifies +// whether the output should include full names, hashes only, or +// nothing in the footnotes. `progress` tells whether a progress meter +// should be displayed while it works. +// +// It returns the size data for the repository. func ScanRepositoryUsingGraph( - repo *git.Repository, filter git.ReferenceFilter, nameStyle NameStyle, progress bool, + ctx context.Context, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, + progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) - var progressMeter meter.Progress - if progress { - progressMeter = meter.NewProgressMeter(100 * time.Millisecond) - } else { - progressMeter = &meter.NoProgressMeter{} - } - - refIter, err := repo.NewReferenceIter() - if err != nil { - return HistorySize{}, err - } - defer func() { - if refIter != nil { - refIter.Close() - } - }() - iter, in, err := repo.NewObjectIter("--stdin", "--date-order") + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } - defer func() { - if iter != nil { - iter.Close() - } - }() errChan := make(chan error, 1) - var refs []git.Reference - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { - defer in.Close() - bufin := bufio.NewWriter(in) - defer bufin.Flush() - - for { - ref, ok, err := refIter.Next() - if err != nil { - errChan <- err - return - } - if !ok { - break - } - if !filter(ref) { - continue - } - refs = append(refs, ref) - _, err = bufin.WriteString(ref.OID.String()) - if err != nil { - errChan <- err - return - } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return + defer objIter.Close() + + errChan <- func() error { + for _, root := range roots { + if !root.Walk() { + continue + } + + if err := objIter.AddRoot(root.OID()); err != nil { + return err + } } - } - err := refIter.Close() - refIter = nil - errChan <- err + return nil + }() }() type ObjectHeader struct { @@ -138,25 +120,25 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing blobs: %d") for { - oid, objectType, objectSize, err := iter.Next() + obj, ok, err := objIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { break } - switch objectType { + switch obj.ObjectType { case "blob": progressMeter.Inc() - graph.RegisterBlob(oid, objectSize) + graph.RegisterBlob(obj.OID, obj.ObjectSize) case "tree": - trees = append(trees, ObjectHeader{oid, objectSize}) + trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{oid, objectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": - tags = append(tags, ObjectHeader{oid, objectSize}) + tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: - err = fmt.Errorf("unexpected object type: %s", objectType) + return HistorySize{}, fmt.Errorf("unexpected object type: %s", obj.ObjectType) } } progressMeter.Done() @@ -166,88 +148,56 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - err = iter.Close() - iter = nil + objectIter, err := repo.NewBatchObjectIter(ctx) if err != nil { return HistorySize{}, err } - objectIter, objectIn, err := repo.NewBatchObjectIter() - if err != nil { - return HistorySize{}, err - } - defer func() { - if objectIter != nil { - objectIter.Close() - } - }() - go func() { - defer objectIn.Close() - bufin := bufio.NewWriter(objectIn) - defer bufin.Flush() - - for _, obj := range trees { - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return - } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return - } - } + defer objectIter.Close() - for i := len(commits); i > 0; i-- { - obj := commits[i-1] - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return - } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return + errChan <- func() error { + for _, obj := range trees { + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting tree '%s': %w", obj.oid, err) + } } - } - for _, obj := range tags { - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return + for i := len(commits); i > 0; i-- { + obj := commits[i-1] + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting commit '%s': %w", obj.oid, err) + } } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return + + for _, obj := range tags { + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting tag '%s': %w", obj.oid, err) + } } - } - errChan <- nil + return nil + }() }() progressMeter.Start("Processing trees: %d") - for _ = range trees { - oid, objectType, _, data, err := objectIter.Next() + for range trees { + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer trees read than expected") } - if objectType != "tree" { - return HistorySize{}, fmt.Errorf("expected tree; read %#v", objectType) + if obj.ObjectType != "tree" { + return HistorySize{}, fmt.Errorf("expected tree; read %#v", obj.ObjectType) } progressMeter.Inc() - tree, err := git.ParseTree(oid, data) + tree, err := git.ParseTree(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } - err = graph.RegisterTree(oid, tree) + err = graph.RegisterTree(obj.OID, tree) if err != nil { return HistorySize{}, err } @@ -259,26 +209,26 @@ func ScanRepositoryUsingGraph( // time: progressMeter.Start("Processing commits: %d") for i := len(commits); i > 0; i-- { - oid, objectType, _, data, err := objectIter.Next() + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer commits read than expected") } - if objectType != "commit" { - return HistorySize{}, fmt.Errorf("expected commit; read %#v", objectType) + if obj.ObjectType != "commit" { + return HistorySize{}, fmt.Errorf("expected commit; read %#v", obj.ObjectType) } - commit, err := git.ParseCommit(oid, data) + commit, err := git.ParseCommit(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } - if oid != commits[i-1].oid { + if obj.OID != commits[i-1].oid { panic("commits not read in same order as requested") } commits[i-1].tree = commit.Tree progressMeter.Inc() - graph.RegisterCommit(oid, commit) + graph.RegisterCommit(obj.OID, commit) } progressMeter.Done() @@ -295,22 +245,22 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing annotated tags: %d") for range tags { - oid, objectType, _, data, err := objectIter.Next() + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer tags read than expected") } - if objectType != "tag" { - return HistorySize{}, fmt.Errorf("expected tag; read %#v", objectType) + if obj.ObjectType != "tag" { + return HistorySize{}, fmt.Errorf("expected tag; read %#v", obj.ObjectType) } - tag, err := git.ParseTag(oid, data) + tag, err := git.ParseTag(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } progressMeter.Inc() - graph.RegisterTag(oid, tag) + graph.RegisterTag(obj.OID, tag) } progressMeter.Done() @@ -319,26 +269,24 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - err = objectIter.Close() - objectIter = nil - if err != nil { - return HistorySize{}, err - } - progressMeter.Start("Processing references: %d") - for _, ref := range refs { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(ref) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() return graph.HistorySize(), nil } -// An object graph that is being built up. +// Graph is an object graph that is being built up. type Graph struct { - repo *git.Repository - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -360,6 +308,7 @@ type Graph struct { pathResolver PathResolver } +// NewGraph creates and returns a new `*Graph` instance. func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ blobSizes: make(map[git.OID]BlobSize), @@ -372,18 +321,30 @@ func NewGraph(nameStyle NameStyle) *Graph { tagRecords: make(map[git.OID]*tagRecord), tagSizes: make(map[git.OID]TagSize), + historySize: HistorySize{ + ReferenceGroups: make(map[RefGroupSymbol]*counts.Count32), + }, + pathResolver: NewPathResolver(nameStyle), } } -func (g *Graph) RegisterReference(ref git.Reference) { +// RegisterReference records the specified reference in `g`. +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) + for _, group := range groups { + g.historySize.recordReferenceGroup(g, group) + } g.historyLock.Unlock() +} - g.pathResolver.RecordReference(ref) +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } +// HistorySize returns the size data that have been collected. func (g *Graph) HistorySize() HistorySize { g.treeLock.Lock() defer g.treeLock.Unlock() @@ -400,7 +361,8 @@ func (g *Graph) HistorySize() HistorySize { return g.historySize } -// Record that the specified `oid` is a blob with the specified size. +// RegisterBlob records that the specified `oid` is a blob with the +// specified size. func (g *Graph) RegisterBlob(oid git.OID, objectSize counts.Count32) { size := BlobSize{Size: objectSize} // There are no listeners. Since this is a blob, we know all that @@ -556,7 +518,7 @@ func (r *treeRecord) initialize(g *Graph, oid git.OID, tree *git.Tree) error { name := entry.Name switch { - case entry.Filemode&0170000 == 0040000: + case entry.Filemode&0o170000 == 0o40000: // Tree listener := func(size TreeSize) { // This listener is called when the tree pointed to by @@ -580,12 +542,12 @@ func (r *treeRecord) initialize(g *Graph, oid git.OID, tree *git.Tree) error { } r.entryCount.Increment(1) - case entry.Filemode&0170000 == 0160000: + case entry.Filemode&0o170000 == 0o160000: // Commit (i.e., submodule) r.size.addSubmodule(name) r.entryCount.Increment(1) - case entry.Filemode&0170000 == 0120000: + case entry.Filemode&0o170000 == 0o120000: // Symlink g.pathResolver.RecordTreeEntry(oid, name, entry.OID) diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..fdaa927 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,88 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type RefRoot struct { + ref git.Reference + walk bool + groups []RefGroupSymbol +} + +func (rr RefRoot) Name() string { return rr.ref.Refname } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]RefRoot, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []RefRoot + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + RefRoot{ + ref: ref, + walk: walk, + groups: groups, + }, + ) + } +} diff --git a/sizes/output.go b/sizes/output.go index 4f1d5fd..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" @@ -40,7 +41,7 @@ func (s TagSize) String() string { return fmt.Sprintf("tag_depth=%d", s.TagDepth) } -func (s HistorySize) String() string { +func (s *HistorySize) String() string { return fmt.Sprintf( "unique_commit_count=%d, unique_commit_count = %d, max_commit_size = %d, "+ "max_history_depth=%d, max_parent_count=%d, "+ @@ -140,30 +141,30 @@ func newItem( } } -func (l *item) Emit(t *table) { - levelOfConcern, interesting := l.levelOfConcern(t.threshold) +func (i *item) Emit(t *table) { + levelOfConcern, interesting := i.levelOfConcern(t.threshold) if !interesting { return } - valueString, unitString := l.humaner.Format(l.value, l.unit) + valueString, unitString := i.humaner.Format(i.value, i.unit) t.formatRow( - l.name, t.footnotes.CreateCitation(l.Footnote(t.nameStyle)), + i.name, t.footnotes.CreateCitation(i.Footnote(t.nameStyle)), valueString, unitString, levelOfConcern, ) } -func (l *item) Footnote(nameStyle NameStyle) string { - if l.path == nil || l.path.OID == git.NullOID { +func (i *item) Footnote(nameStyle NameStyle) string { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { case NameStyleNone: return "" case NameStyleHash: - return l.path.OID.String() + return i.path.OID.String() case NameStyleFull: - return l.path.String() + return i.path.String() default: panic("unexpected NameStyle") } @@ -172,9 +173,12 @@ func (l *item) Footnote(nameStyle NameStyle) string { // If this item's alert level is at least as high as the threshold, // return the string that should be used as its "level of concern" and // `true`; otherwise, return `"", false`. -func (l *item) levelOfConcern(threshold Threshold) (string, bool) { - value, _ := l.value.ToUint64() - alert := Threshold(float64(value) / l.scale) +func (i *item) levelOfConcern(threshold Threshold) (string, bool) { + value, overflow := i.value.ToUint64() + if overflow { + return "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", true + } + alert := Threshold(float64(value) / i.scale) if alert < threshold { return "", false } @@ -210,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() } @@ -218,30 +222,51 @@ func (i *item) MarshalJSON() ([]byte, error) { return json.Marshal(stat) } +// Indented returns an `item` that is just like `i`, but indented by +// `depth` more levels. +func (i *item) Indented(depth int) tableContents { + return &indentedItem{ + tableContents: i, + depth: depth, + } +} + +type indentedItem struct { + tableContents + depth int +} + +func (i *indentedItem) Emit(t *table) { + subTable := t.indented("", i.depth) + i.tableContents.Emit(subTable) + t.addSection(subTable) +} + type Threshold float64 // Methods to implement pflag.Value: + func (t *Threshold) String() string { if t == nil { return "UNSET" - } else { - switch *t { - case 0: - return "--verbose" - case 1: - return "--threshold=1" - case 30: - return "--critical" - default: - return fmt.Sprintf("--threshold=%g", *t) - } + } + + switch *t { + case 0: + return "--verbose" + case 1: + return "--threshold=1" + case 30: + return "--critical" + default: + return fmt.Sprintf("--threshold=%g", *t) } } func (t *Threshold) Set(s string) error { v, err := strconv.ParseFloat(s, 64) if err != nil { - return fmt.Errorf("error parsing floating-point value %q: %s", s, err) + return fmt.Errorf("error parsing floating-point value %q: %w", s, err) } *t = Threshold(v) return nil @@ -301,20 +326,21 @@ const ( ) // Methods to implement pflag.Value: + func (n *NameStyle) String() string { if n == nil { return "UNSET" - } else { - switch *n { - case NameStyleNone: - return "none" - case NameStyleHash: - return "hash" - case NameStyleFull: - return "full" - default: - panic("Unexpected NameStyle value") - } + } + + switch *n { + case NameStyleNone: + return "none" + case NameStyleHash: + return "hash" + case NameStyleFull: + return "full" + default: + panic("Unexpected NameStyle value") } } @@ -345,8 +371,10 @@ type table struct { buf bytes.Buffer } -func (s HistorySize) TableString(threshold Threshold, nameStyle NameStyle) string { - contents := s.contents() +func (s *HistorySize) TableString( + refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, +) string { + contents := s.contents(refGroups) t := table{ threshold: threshold, nameStyle: nameStyle, @@ -363,16 +391,20 @@ func (s HistorySize) TableString(threshold Threshold, nameStyle NameStyle) strin return t.generateHeader() + t.buf.String() + t.footnotes.String() } -func (t *table) subTable(sectionHeader string) *table { +func (t *table) indented(sectionHeader string, depth int) *table { return &table{ threshold: t.threshold, nameStyle: t.nameStyle, sectionHeader: sectionHeader, footnotes: t.footnotes, - indent: t.indent + 1, + indent: t.indent + depth, } } +func (t *table) subTable(sectionHeader string) *table { + return t.indented(sectionHeader, 1) +} + func (t *table) addSection(subTable *table) { if subTable.buf.Len() > 0 { if t.buf.Len() == 0 { @@ -422,19 +454,41 @@ func (t *table) formatRow( ) } -func (s HistorySize) JSON(threshold Threshold, nameStyle NameStyle) ([]byte, error) { - contents := s.contents() +func (s *HistorySize) JSON( + refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, +) ([]byte, error) { + contents := s.contents(refGroups) items := make(map[string]*item) contents.CollectItems(items) j, err := json.MarshalIndent(items, "", " ") return j, err } -func (s HistorySize) contents() tableContents { +func (s *HistorySize) contents(refGroups []RefGroup) tableContents { S := newSection I := newItem metric := counts.Metric binary := counts.Binary + + //nolint:prealloc // The length is not known in advance. + var rgis []tableContents + for _, rg := range refGroups { + if rg.Symbol == "" { + continue + } + count, ok := s.ReferenceGroups[rg.Symbol] + if !ok { + continue + } + rgi := I( + fmt.Sprintf("refgroup.%s", rg.Symbol), rg.Name, + fmt.Sprintf("The number of references in group '%s'", rg.Symbol), + nil, *count, metric, "", 25000, + ) + indent := strings.Count(string(rg.Symbol), ".") + rgis = append(rgis, rgi.Indented(indent)) + } + return S( "", S( @@ -484,6 +538,10 @@ func (s HistorySize) contents() tableContents { I("referenceCount", "Count", "The total number of references", nil, s.ReferenceCount, metric, "", 25e3), + S( + "", + rgis..., + ), ), ), diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index f0f59d2..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,16 +131,19 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": - if p.parent != nil { + switch { + case p.parent != nil: // The parent is a tag. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) - } else if p.relativePath != "" { + case p.relativePath != "": return p.relativePath + ":" - } else { + default: return p.OID.String() + ":" } default: @@ -152,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -160,16 +165,19 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": - if p.parent != nil { + switch { + case p.parent != nil: // The parent is a tag. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) - } else if p.relativePath != "" { + case p.relativePath != "": return p.relativePath - } else { + default: return "" } default: @@ -254,10 +262,13 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { panic("forgetPathLocked() called when refcount zero") } p.seekerCount-- + if p.seekerCount > 0 { // The path is still wanted (by another seeker). return - } else if p.parent != nil { + } + + if p.parent != nil { // We already found the object's parent, and the parent's path // is wanted on account if this object. Decrement its // seekerCount. @@ -269,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified diff --git a/sizes/sizes.go b/sizes/sizes.go index eb08e6f..b3de0bc 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -160,6 +160,10 @@ type HistorySize struct { // once. ReferenceCount counts.Count32 `json:"reference_count"` + // ReferenceGroups keeps track of how many references in each + // reference group were scanned. + ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups"` + // The maximum TreeSize in the analyzed history (where each // attribute is maximized separately). @@ -288,3 +292,13 @@ func (s *HistorySize) recordTag(g *Graph, oid git.OID, tagSize TagSize, size cou func (s *HistorySize) recordReference(g *Graph, ref git.Reference) { s.ReferenceCount.Increment(1) } + +func (s *HistorySize) recordReferenceGroup(g *Graph, group RefGroupSymbol) { + c, ok := s.ReferenceGroups[group] + if ok { + c.Increment(1) + } else { + n := counts.Count32(1) + s.ReferenceGroups[group] = &n + } +}