blob: 8fdef16742bcef805871edabee843bb543d194ad [file] [log] [blame]
Máximo Cuadrosb543e4e2015-12-12 02:22:521package git
Alberto Cortésd643cea2015-11-16 02:20:012
3import (
4 "bytes"
Alberto Cortés48bf5bd2015-11-27 11:39:345 "errors"
Alberto Cortésd643cea2015-11-16 02:20:016 "fmt"
Alberto Cortésd643cea2015-11-16 02:20:017 "strconv"
8 "strings"
9 "unicode/utf8"
10
Máximo Cuadros6b0a5982016-02-17 11:20:5611 "gopkg.in/src-d/go-git.v3/core"
12 "gopkg.in/src-d/go-git.v3/diff"
Alberto Cortésd643cea2015-11-16 02:20:0113)
14
Máximo Cuadrosb543e4e2015-12-12 02:22:5215type Blame struct {
16 Path string
17 Rev core.Hash
18 Lines []*line
19}
20
21// Blame returns the last commit that modified each line of a file in a
22// repository.
Alberto Cortésd643cea2015-11-16 02:20:0123//
24// The file to blame is identified by the input arguments: repo, commit and path.
25// The output is a slice of commits, one for each line in the file.
26//
27// Blaming a file is a two step process:
28//
29// 1. Create a linear history of the commits affecting a file. We use
Máximo Cuadros1931dfb2016-02-16 16:31:0930// revlist.New for that.
Alberto Cortésd643cea2015-11-16 02:20:0131//
32// 2. Then build a graph with a node for every line in every file in
Máximo Cuadros1931dfb2016-02-16 16:31:0933// the history of the file.
Alberto Cortésd643cea2015-11-16 02:20:0134//
Máximo Cuadros1931dfb2016-02-16 16:31:0935// Each node (line) holds the commit where it was introduced or
36// last modified. To achieve that we use the FORWARD algorithm
37// described in Zimmermann, et al. "Mining Version Archives for
38// Co-changed Lines", in proceedings of the Mining Software
39// Repositories workshop, Shanghai, May 22-23, 2006.
Alberto Cortésd643cea2015-11-16 02:20:0140//
Máximo Cuadros08f9e702016-05-19 11:28:1741// Each node is assigned a commit: Start by the nodes in the first
Máximo Cuadros1931dfb2016-02-16 16:31:0942// commit. Assign that commit as the creator of all its lines.
Alberto Cortésd643cea2015-11-16 02:20:0143//
Máximo Cuadros1931dfb2016-02-16 16:31:0944// Then jump to the nodes in the next commit, and calculate the diff
45// between the two files. Newly created lines get
46// assigned the new commit as its origin. Modified lines also get
47// this new commit. Untouched lines retain the old commit.
Alberto Cortésd643cea2015-11-16 02:20:0148//
Máximo Cuadros1931dfb2016-02-16 16:31:0949// All this work is done in the assignOrigin function which holds all
50// the internal relevant data in a "blame" struct, that is not
51// exported.
Alberto Cortésd643cea2015-11-16 02:20:0152//
Máximo Cuadros1931dfb2016-02-16 16:31:0953// TODO: ways to improve the efficiency of this function:
Alberto Cortésd643cea2015-11-16 02:20:0154//
Máximo Cuadros1931dfb2016-02-16 16:31:0955// 1. Improve revlist
Alberto Cortésd643cea2015-11-16 02:20:0156//
Máximo Cuadros1931dfb2016-02-16 16:31:0957// 2. Improve how to traverse the history (example a backward
58// traversal will be much more efficient)
Alberto Cortésd643cea2015-11-16 02:20:0159//
Máximo Cuadros1931dfb2016-02-16 16:31:0960// TODO: ways to improve the function in general:
Alberto Cortésd643cea2015-11-16 02:20:0161//
Máximo Cuadros1931dfb2016-02-16 16:31:0962// 1. Add memoization between revlist and assign.
Alberto Cortésd643cea2015-11-16 02:20:0163//
Máximo Cuadros1931dfb2016-02-16 16:31:0964// 2. It is using much more memory than needed, see the TODOs below.
Máximo Cuadrosb543e4e2015-12-12 02:22:5265func (c *Commit) Blame(path string) (*Blame, error) {
Alberto Cortésd643cea2015-11-16 02:20:0166 b := new(blame)
Máximo Cuadrosb543e4e2015-12-12 02:22:5267 b.fRev = c
Alberto Cortésd643cea2015-11-16 02:20:0168 b.path = path
69
Alberto Cortés48bf5bd2015-11-27 11:39:3470 // get all the file revisions
71 if err := b.fillRevs(); err != nil {
72 return nil, err
73 }
74
75 // calculate the line tracking graph and fill in
76 // file contents in data.
77 if err := b.fillGraphAndData(); err != nil {
78 return nil, err
79 }
80
81 file, err := b.fRev.File(b.path)
Alberto Cortésd643cea2015-11-16 02:20:0182 if err != nil {
83 return nil, err
84 }
Joshua Sjoding0d999e12016-02-25 06:40:3085 finalLines, err := file.Lines()
86 if err != nil {
87 return nil, err
88 }
Alberto Cortésd643cea2015-11-16 02:20:0189
Alberto Cortés48bf5bd2015-11-27 11:39:3490 lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
91 if err != nil {
92 return nil, err
93 }
Alberto Cortésd643cea2015-11-16 02:20:0194
Alberto Cortés48bf5bd2015-11-27 11:39:3495 return &Blame{
Alberto Cortés48bf5bd2015-11-27 11:39:3496 Path: path,
Máximo Cuadrosb543e4e2015-12-12 02:22:5297 Rev: c.Hash,
Alberto Cortés48bf5bd2015-11-27 11:39:3498 Lines: lines,
99 }, nil
100}
Alberto Cortésd643cea2015-11-16 02:20:01101
Alberto Cortés48bf5bd2015-11-27 11:39:34102type line struct {
103 author string
104 text string
105}
106
107func newLine(author, text string) *line {
108 return &line{
109 author: author,
110 text: text,
111 }
112}
113
Máximo Cuadrosb543e4e2015-12-12 02:22:52114func newLines(contents []string, commits []*Commit) ([]*line, error) {
Alberto Cortés48bf5bd2015-11-27 11:39:34115 if len(contents) != len(commits) {
Alberto Cortés48bf5bd2015-11-27 11:39:34116 return nil, errors.New("contents and commits have different length")
117 }
118 result := make([]*line, 0, len(contents))
119 for i := range contents {
120 l := newLine(commits[i].Author.Email, contents[i])
121 result = append(result, l)
122 }
123 return result, nil
124}
125
126// this struct is internally used by the blame function to hold its
127// inputs, outputs and state.
128type blame struct {
Máximo Cuadrosb543e4e2015-12-12 02:22:52129 path string // the path of the file to blame
130 fRev *Commit // the commit of the final revision of the file to blame
131 revs []*Commit // the chain of revisions affecting the the file to blame
132 data []string // the contents of the file across all its revisions
133 graph [][]*Commit // the graph of the lines in the file across all the revisions TODO: not all commits are needed, only the current rev and the prev
Alberto Cortés48bf5bd2015-11-27 11:39:34134}
135
Alberto Cortésc347e972015-12-11 16:57:10136// calculte the history of a file "path", starting from commit "from", sorted by commit date.
Alberto Cortés48bf5bd2015-11-27 11:39:34137func (b *blame) fillRevs() error {
138 var err error
Máximo Cuadrosb543e4e2015-12-12 02:22:52139
140 b.revs, err = b.fRev.References(b.path)
Alberto Cortés48bf5bd2015-11-27 11:39:34141 if err != nil {
142 return err
143 }
144 return nil
145}
146
147// build graph of a file from its revision history
148func (b *blame) fillGraphAndData() error {
Máximo Cuadrosb543e4e2015-12-12 02:22:52149 b.graph = make([][]*Commit, len(b.revs))
Alberto Cortés48bf5bd2015-11-27 11:39:34150 b.data = make([]string, len(b.revs)) // file contents in all the revisions
151 // for every revision of the file, starting with the first
Alberto Cortésd643cea2015-11-16 02:20:01152 // one...
Alberto Cortésd643cea2015-11-16 02:20:01153 for i, rev := range b.revs {
154 // get the contents of the file
Alberto Cortés48bf5bd2015-11-27 11:39:34155 file, err := rev.File(b.path)
156 if err != nil {
157 return nil
Alberto Cortésd643cea2015-11-16 02:20:01158 }
Joshua Sjoding0d999e12016-02-25 06:40:30159 b.data[i], err = file.Contents()
160 if err != nil {
161 return err
162 }
Máximo Cuadrosb543e4e2015-12-12 02:22:52163 nLines := countLines(b.data[i])
Alberto Cortésd643cea2015-11-16 02:20:01164 // create a node for each line
Máximo Cuadrosb543e4e2015-12-12 02:22:52165 b.graph[i] = make([]*Commit, nLines)
Alberto Cortésd643cea2015-11-16 02:20:01166 // assign a commit to each node
167 // if this is the first revision, then the node is assigned to
168 // this first commit.
169 if i == 0 {
170 for j := 0; j < nLines; j++ {
Máximo Cuadrosb543e4e2015-12-12 02:22:52171 b.graph[i][j] = (*Commit)(b.revs[i])
Alberto Cortésd643cea2015-11-16 02:20:01172 }
173 } else {
174 // if this is not the first commit, then assign to the old
175 // commit or to the new one, depending on what the diff
176 // says.
177 b.assignOrigin(i, i-1)
178 }
179 }
Alberto Cortés48bf5bd2015-11-27 11:39:34180 return nil
181}
Alberto Cortésd643cea2015-11-16 02:20:01182
Alberto Cortés48bf5bd2015-11-27 11:39:34183// sliceGraph returns a slice of commits (one per line) for a particular
184// revision of a file (0=first revision).
Máximo Cuadrosb543e4e2015-12-12 02:22:52185func (b *blame) sliceGraph(i int) []*Commit {
Alberto Cortés48bf5bd2015-11-27 11:39:34186 fVs := b.graph[i]
Máximo Cuadrosb543e4e2015-12-12 02:22:52187 result := make([]*Commit, 0, len(fVs))
Alberto Cortésd643cea2015-11-16 02:20:01188 for _, v := range fVs {
Máximo Cuadrosb543e4e2015-12-12 02:22:52189 c := Commit(*v)
Alberto Cortésd643cea2015-11-16 02:20:01190 result = append(result, &c)
191 }
Alberto Cortés48bf5bd2015-11-27 11:39:34192 return result
Alberto Cortésd643cea2015-11-16 02:20:01193}
194
Alberto Cortésd643cea2015-11-16 02:20:01195// Assigns origin to vertexes in current (c) rev from data in its previous (p)
196// revision
197func (b *blame) assignOrigin(c, p int) {
198 // assign origin based on diff info
199 hunks := diff.Do(b.data[p], b.data[c])
200 sl := -1 // source line
201 dl := -1 // destination line
202 for h := range hunks {
Máximo Cuadrosb543e4e2015-12-12 02:22:52203 hLines := countLines(hunks[h].Text)
Alberto Cortésd643cea2015-11-16 02:20:01204 for hl := 0; hl < hLines; hl++ {
Alberto Cortésd643cea2015-11-16 02:20:01205 switch {
206 case hunks[h].Type == 0:
207 sl++
208 dl++
209 b.graph[c][dl] = b.graph[p][sl]
210 case hunks[h].Type == 1:
211 dl++
Máximo Cuadrosb543e4e2015-12-12 02:22:52212 b.graph[c][dl] = (*Commit)(b.revs[c])
Alberto Cortésd643cea2015-11-16 02:20:01213 case hunks[h].Type == -1:
214 sl++
215 default:
216 panic("unreachable")
217 }
218 }
219 }
220}
221
Alberto Cortésc347e972015-12-11 16:57:10222// GoString prints the results of a Blame using git-blame's style.
223func (b *blame) GoString() string {
Alberto Cortésd643cea2015-11-16 02:20:01224 var buf bytes.Buffer
225
Alberto Cortés48bf5bd2015-11-27 11:39:34226 file, err := b.fRev.File(b.path)
227 if err != nil {
Alberto Cortésd643cea2015-11-16 02:20:01228 panic("PrettyPrint: internal error in repo.Data")
229 }
Joshua Sjoding0d999e12016-02-25 06:40:30230 contents, err := file.Contents()
231 if err != nil {
232 panic("PrettyPrint: internal error in repo.Data")
233 }
Alberto Cortésd643cea2015-11-16 02:20:01234
235 lines := strings.Split(contents, "\n")
236 // max line number length
237 mlnl := len(fmt.Sprintf("%s", strconv.Itoa(len(lines))))
238 // max author length
239 mal := b.maxAuthorLength()
240 format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
241 mal, mlnl)
242
243 fVs := b.graph[len(b.graph)-1]
244 for ln, v := range fVs {
245 fmt.Fprintf(&buf, format, v.Hash.String()[:8],
246 prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
247 }
248 return buf.String()
249}
250
251// utility function to pretty print the author.
Máximo Cuadrosb543e4e2015-12-12 02:22:52252func prettyPrintAuthor(c *Commit) string {
Alberto Cortésd643cea2015-11-16 02:20:01253 return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
254}
255
256// utility function to calculate the number of runes needed
257// to print the longest author name in the blame of a file.
258func (b *blame) maxAuthorLength() int {
259 memo := make(map[core.Hash]struct{}, len(b.graph)-1)
260 fVs := b.graph[len(b.graph)-1]
261 m := 0
262 for ln := range fVs {
263 if _, ok := memo[fVs[ln].Hash]; ok {
264 continue
265 }
266 memo[fVs[ln].Hash] = struct{}{}
267 m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
268 }
269 return m
270}
271
272func max(a, b int) int {
273 if a > b {
274 return a
275 }
276 return b
277}