blob: adb72d5745f1d9c4490aeba1fa9267ac00a6af66 [file] [log] [blame]
Máximo Cuadrosb543e4e2015-12-12 02:22:521package git
Alberto Cortésd643cea2015-11-16 02:20:012
3import (
4 "bytes"
Alberto Cortés48bf5bd2015-11-27 11:39:345 "errors"
Alberto Cortésd643cea2015-11-16 02:20:016 "fmt"
Alberto Cortésd643cea2015-11-16 02:20:017 "strconv"
8 "strings"
Shane Da Silva0c19e6b2018-02-19 17:19:009 "time"
Alberto Cortésd643cea2015-11-16 02:20:0110 "unicode/utf8"
11
Antonio Jesus Navarro Perez33e7c162017-03-07 11:58:1812 "gopkg.in/src-d/go-git.v4/plumbing"
13 "gopkg.in/src-d/go-git.v4/plumbing/object"
14 "gopkg.in/src-d/go-git.v4/utils/diff"
Alberto Cortésd643cea2015-11-16 02:20:0115)
16
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:2617// BlameResult represents the result of a Blame operation.
Santiago M. Mola0af572d2016-12-14 22:12:4418type BlameResult struct {
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:2619 // Path is the path of the File that we're blaming.
Antonio Jesus Navarro Perez90069152017-02-28 14:09:1520 Path string
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:2621 // Rev (Revision) is the hash of the specified Commit used to generate this result.
Antonio Jesus Navarro Perez90069152017-02-28 14:09:1522 Rev plumbing.Hash
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:2623 // Lines contains every line with its authorship.
Alberto Cortés4fe64a12017-01-19 10:17:5524 Lines []*Line
Máximo Cuadrosb543e4e2015-12-12 02:22:5225}
26
Antonio Jesus Navarro Pereze4081622017-03-02 11:49:5627// Blame returns a BlameResult with the information about the last author of
28// each line from file `path` at commit `c`.
Santiago M. Mola0af572d2016-12-14 22:12:4429func Blame(c *object.Commit, path string) (*BlameResult, error) {
Antonio Jesus Navarro Pereze4081622017-03-02 11:49:5630 // The file to blame is identified by the input arguments:
31 // commit and path. commit is a Commit object obtained from a Repository. Path
32 // represents a path to a specific file contained into the repository.
33 //
34 // Blaming a file is a two step process:
35 //
36 // 1. Create a linear history of the commits affecting a file. We use
37 // revlist.New for that.
38 //
39 // 2. Then build a graph with a node for every line in every file in
40 // the history of the file.
41 //
42 // Each node is assigned a commit: Start by the nodes in the first
43 // commit. Assign that commit as the creator of all its lines.
44 //
45 // Then jump to the nodes in the next commit, and calculate the diff
46 // between the two files. Newly created lines get
47 // assigned the new commit as its origin. Modified lines also get
48 // this new commit. Untouched lines retain the old commit.
49 //
50 // All this work is done in the assignOrigin function which holds all
51 // the internal relevant data in a "blame" struct, that is not
52 // exported.
53 //
Máximo Cuadrosa6197bd2017-01-31 22:09:2854 // TODO: ways to improve the efficiency of this function:
55 // 1. Improve revlist
56 // 2. Improve how to traverse the history (example a backward traversal will
57 // be much more efficient)
58 //
59 // TODO: ways to improve the function in general:
60 // 1. Add memoization between revlist and assign.
61 // 2. It is using much more memory than needed, see the TODOs below.
62
Alberto Cortésd643cea2015-11-16 02:20:0163 b := new(blame)
Máximo Cuadrosb543e4e2015-12-12 02:22:5264 b.fRev = c
Alberto Cortésd643cea2015-11-16 02:20:0165 b.path = path
66
Alberto Cortés48bf5bd2015-11-27 11:39:3467 // get all the file revisions
68 if err := b.fillRevs(); err != nil {
69 return nil, err
70 }
71
72 // calculate the line tracking graph and fill in
73 // file contents in data.
74 if err := b.fillGraphAndData(); err != nil {
75 return nil, err
76 }
77
78 file, err := b.fRev.File(b.path)
Alberto Cortésd643cea2015-11-16 02:20:0179 if err != nil {
80 return nil, err
81 }
Joshua Sjoding0d999e12016-02-25 06:40:3082 finalLines, err := file.Lines()
83 if err != nil {
84 return nil, err
85 }
Alberto Cortésd643cea2015-11-16 02:20:0186
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:2687 // Each node (line) holds the commit where it was introduced or
88 // last modified. To achieve that we use the FORWARD algorithm
89 // described in Zimmermann, et al. "Mining Version Archives for
90 // Co-changed Lines", in proceedings of the Mining Software
91 // Repositories workshop, Shanghai, May 22-23, 2006.
Alberto Cortés48bf5bd2015-11-27 11:39:3492 lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
93 if err != nil {
94 return nil, err
95 }
Alberto Cortésd643cea2015-11-16 02:20:0196
Santiago M. Mola0af572d2016-12-14 22:12:4497 return &BlameResult{
Alberto Cortés48bf5bd2015-11-27 11:39:3498 Path: path,
Máximo Cuadrosb543e4e2015-12-12 02:22:5299 Rev: c.Hash,
Alberto Cortés48bf5bd2015-11-27 11:39:34100 Lines: lines,
101 }, nil
102}
Alberto Cortésd643cea2015-11-16 02:20:01103
Alberto Cortés4fe64a12017-01-19 10:17:55104// Line values represent the contents and author of a line in BlamedResult values.
105type Line struct {
Antonio Jesus Navarro Perezbe7e1e62017-03-02 09:48:26106 // Author is the email address of the last author that modified the line.
Antonio Jesus Navarro Perez90069152017-02-28 14:09:15107 Author string
108 // Text is the original text of the line.
109 Text string
Shane Da Silva0c19e6b2018-02-19 17:19:00110 // Date is when the original text of the line was introduced
111 Date time.Time
Shane Da Silvaa3cf1232018-03-28 00:55:08112 // Hash is the commit hash that introduced the original line
113 Hash plumbing.Hash
Alberto Cortés48bf5bd2015-11-27 11:39:34114}
115
Shane Da Silvaa3cf1232018-03-28 00:55:08116func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line {
Alberto Cortés4fe64a12017-01-19 10:17:55117 return &Line{
118 Author: author,
119 Text: text,
Shane Da Silvaa3cf1232018-03-28 00:55:08120 Hash: hash,
Shane Da Silva0c19e6b2018-02-19 17:19:00121 Date: date,
Alberto Cortés48bf5bd2015-11-27 11:39:34122 }
123}
124
Alberto Cortés4fe64a12017-01-19 10:17:55125func newLines(contents []string, commits []*object.Commit) ([]*Line, error) {
Máximo Cuadrosc4be0442018-10-15 22:12:10126 lcontents := len(contents)
127 lcommits := len(commits)
128
129 if lcontents != lcommits {
130 if lcontents == lcommits-1 && contents[lcontents-1] != "\n" {
131 contents = append(contents, "\n")
132 } else {
133 return nil, errors.New("contents and commits have different length")
134 }
Alberto Cortés48bf5bd2015-11-27 11:39:34135 }
Máximo Cuadrosc4be0442018-10-15 22:12:10136
137 result := make([]*Line, 0, lcontents)
Alberto Cortés48bf5bd2015-11-27 11:39:34138 for i := range contents {
Máximo Cuadrosc4be0442018-10-15 22:12:10139 result = append(result, newLine(
140 commits[i].Author.Email, contents[i],
141 commits[i].Author.When, commits[i].Hash,
142 ))
Alberto Cortés48bf5bd2015-11-27 11:39:34143 }
Máximo Cuadrosc4be0442018-10-15 22:12:10144
Alberto Cortés48bf5bd2015-11-27 11:39:34145 return result, nil
146}
147
148// this struct is internally used by the blame function to hold its
149// inputs, outputs and state.
150type blame struct {
Antonio Jesus Navarro Perez9251df12017-02-28 11:17:13151 // the path of the file to blame
152 path string
153 // the commit of the final revision of the file to blame
154 fRev *object.Commit
155 // the chain of revisions affecting the the file to blame
156 revs []*object.Commit
157 // the contents of the file across all its revisions
158 data []string
159 // the graph of the lines in the file across all the revisions
160 graph [][]*object.Commit
Alberto Cortés48bf5bd2015-11-27 11:39:34161}
162
Antonio Jesus Navarro Perezbe8d1942017-04-10 14:48:40163// calculate the history of a file "path", starting from commit "from", sorted by commit date.
Alberto Cortés48bf5bd2015-11-27 11:39:34164func (b *blame) fillRevs() error {
165 var err error
Máximo Cuadrosb543e4e2015-12-12 02:22:52166
Antonio Jesus Navarro Perezbe8d1942017-04-10 14:48:40167 b.revs, err = references(b.fRev, b.path)
ferhat elmas18e6f9d2017-11-29 01:24:31168 return err
Alberto Cortés48bf5bd2015-11-27 11:39:34169}
170
171// build graph of a file from its revision history
172func (b *blame) fillGraphAndData() error {
Antonio Jesus Navarro Perez9251df12017-02-28 11:17:13173 //TODO: not all commits are needed, only the current rev and the prev
Santiago M. Mola0af572d2016-12-14 22:12:44174 b.graph = make([][]*object.Commit, len(b.revs))
Alberto Cortés48bf5bd2015-11-27 11:39:34175 b.data = make([]string, len(b.revs)) // file contents in all the revisions
176 // for every revision of the file, starting with the first
Alberto Cortésd643cea2015-11-16 02:20:01177 // one...
Alberto Cortésd643cea2015-11-16 02:20:01178 for i, rev := range b.revs {
179 // get the contents of the file
Alberto Cortés48bf5bd2015-11-27 11:39:34180 file, err := rev.File(b.path)
181 if err != nil {
182 return nil
Alberto Cortésd643cea2015-11-16 02:20:01183 }
Joshua Sjoding0d999e12016-02-25 06:40:30184 b.data[i], err = file.Contents()
185 if err != nil {
186 return err
187 }
Máximo Cuadrosb543e4e2015-12-12 02:22:52188 nLines := countLines(b.data[i])
Alberto Cortésd643cea2015-11-16 02:20:01189 // create a node for each line
Santiago M. Mola0af572d2016-12-14 22:12:44190 b.graph[i] = make([]*object.Commit, nLines)
Alberto Cortésd643cea2015-11-16 02:20:01191 // assign a commit to each node
192 // if this is the first revision, then the node is assigned to
193 // this first commit.
194 if i == 0 {
195 for j := 0; j < nLines; j++ {
Santiago M. Mola0af572d2016-12-14 22:12:44196 b.graph[i][j] = (*object.Commit)(b.revs[i])
Alberto Cortésd643cea2015-11-16 02:20:01197 }
198 } else {
199 // if this is not the first commit, then assign to the old
200 // commit or to the new one, depending on what the diff
201 // says.
202 b.assignOrigin(i, i-1)
203 }
204 }
Alberto Cortés48bf5bd2015-11-27 11:39:34205 return nil
206}
Alberto Cortésd643cea2015-11-16 02:20:01207
Alberto Cortés48bf5bd2015-11-27 11:39:34208// sliceGraph returns a slice of commits (one per line) for a particular
209// revision of a file (0=first revision).
Santiago M. Mola0af572d2016-12-14 22:12:44210func (b *blame) sliceGraph(i int) []*object.Commit {
Alberto Cortés48bf5bd2015-11-27 11:39:34211 fVs := b.graph[i]
Santiago M. Mola0af572d2016-12-14 22:12:44212 result := make([]*object.Commit, 0, len(fVs))
Alberto Cortésd643cea2015-11-16 02:20:01213 for _, v := range fVs {
Santiago M. Mola0af572d2016-12-14 22:12:44214 c := object.Commit(*v)
Alberto Cortésd643cea2015-11-16 02:20:01215 result = append(result, &c)
216 }
Alberto Cortés48bf5bd2015-11-27 11:39:34217 return result
Alberto Cortésd643cea2015-11-16 02:20:01218}
219
Alberto Cortésd643cea2015-11-16 02:20:01220// Assigns origin to vertexes in current (c) rev from data in its previous (p)
221// revision
222func (b *blame) assignOrigin(c, p int) {
223 // assign origin based on diff info
224 hunks := diff.Do(b.data[p], b.data[c])
225 sl := -1 // source line
226 dl := -1 // destination line
227 for h := range hunks {
Máximo Cuadrosb543e4e2015-12-12 02:22:52228 hLines := countLines(hunks[h].Text)
Alberto Cortésd643cea2015-11-16 02:20:01229 for hl := 0; hl < hLines; hl++ {
Alberto Cortésd643cea2015-11-16 02:20:01230 switch {
231 case hunks[h].Type == 0:
232 sl++
233 dl++
234 b.graph[c][dl] = b.graph[p][sl]
235 case hunks[h].Type == 1:
236 dl++
Santiago M. Mola0af572d2016-12-14 22:12:44237 b.graph[c][dl] = (*object.Commit)(b.revs[c])
Alberto Cortésd643cea2015-11-16 02:20:01238 case hunks[h].Type == -1:
239 sl++
240 default:
241 panic("unreachable")
242 }
243 }
244 }
245}
246
Alberto Cortésc347e972015-12-11 16:57:10247// GoString prints the results of a Blame using git-blame's style.
248func (b *blame) GoString() string {
Alberto Cortésd643cea2015-11-16 02:20:01249 var buf bytes.Buffer
250
Alberto Cortés48bf5bd2015-11-27 11:39:34251 file, err := b.fRev.File(b.path)
252 if err != nil {
Alberto Cortésd643cea2015-11-16 02:20:01253 panic("PrettyPrint: internal error in repo.Data")
254 }
Joshua Sjoding0d999e12016-02-25 06:40:30255 contents, err := file.Contents()
256 if err != nil {
257 panic("PrettyPrint: internal error in repo.Data")
258 }
Alberto Cortésd643cea2015-11-16 02:20:01259
260 lines := strings.Split(contents, "\n")
261 // max line number length
ferhat elmas18e6f9d2017-11-29 01:24:31262 mlnl := len(strconv.Itoa(len(lines)))
Alberto Cortésd643cea2015-11-16 02:20:01263 // max author length
264 mal := b.maxAuthorLength()
265 format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
266 mal, mlnl)
267
268 fVs := b.graph[len(b.graph)-1]
269 for ln, v := range fVs {
270 fmt.Fprintf(&buf, format, v.Hash.String()[:8],
271 prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
272 }
273 return buf.String()
274}
275
276// utility function to pretty print the author.
Santiago M. Mola0af572d2016-12-14 22:12:44277func prettyPrintAuthor(c *object.Commit) string {
Alberto Cortésd643cea2015-11-16 02:20:01278 return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
279}
280
281// utility function to calculate the number of runes needed
282// to print the longest author name in the blame of a file.
283func (b *blame) maxAuthorLength() int {
Máximo Cuadrosac095bb2016-11-08 22:46:38284 memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1)
Alberto Cortésd643cea2015-11-16 02:20:01285 fVs := b.graph[len(b.graph)-1]
286 m := 0
287 for ln := range fVs {
288 if _, ok := memo[fVs[ln].Hash]; ok {
289 continue
290 }
291 memo[fVs[ln].Hash] = struct{}{}
292 m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
293 }
294 return m
295}
296
297func max(a, b int) int {
298 if a > b {
299 return a
300 }
301 return b
302}