| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 1 | package git |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 2 | |
| 3 | import ( |
| 4 | "bytes" |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 5 | "errors" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 6 | "fmt" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 7 | "strconv" |
| 8 | "strings" |
| Shane Da Silva | 0c19e6b | 2018-02-19 17:19:00 | [diff] [blame] | 9 | "time" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 10 | "unicode/utf8" |
| 11 | |
| Antonio Jesus Navarro Perez | 33e7c16 | 2017-03-07 11:58:18 | [diff] [blame] | 12 | "gopkg.in/src-d/go-git.v4/plumbing" |
| 13 | "gopkg.in/src-d/go-git.v4/plumbing/object" |
| 14 | "gopkg.in/src-d/go-git.v4/utils/diff" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 15 | ) |
| 16 | |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 17 | // BlameResult represents the result of a Blame operation. |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 18 | type BlameResult struct { |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 19 | // Path is the path of the File that we're blaming. |
| Antonio Jesus Navarro Perez | 9006915 | 2017-02-28 14:09:15 | [diff] [blame] | 20 | Path string |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 21 | // Rev (Revision) is the hash of the specified Commit used to generate this result. |
| Antonio Jesus Navarro Perez | 9006915 | 2017-02-28 14:09:15 | [diff] [blame] | 22 | Rev plumbing.Hash |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 23 | // Lines contains every line with its authorship. |
| Alberto Cortés | 4fe64a1 | 2017-01-19 10:17:55 | [diff] [blame] | 24 | Lines []*Line |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 25 | } |
| 26 | |
| Antonio Jesus Navarro Perez | e408162 | 2017-03-02 11:49:56 | [diff] [blame] | 27 | // Blame returns a BlameResult with the information about the last author of |
| 28 | // each line from file `path` at commit `c`. |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 29 | func Blame(c *object.Commit, path string) (*BlameResult, error) { |
| Antonio Jesus Navarro Perez | e408162 | 2017-03-02 11:49:56 | [diff] [blame] | 30 | // The file to blame is identified by the input arguments: |
| 31 | // commit and path. commit is a Commit object obtained from a Repository. Path |
| 32 | // represents a path to a specific file contained into the repository. |
| 33 | // |
| 34 | // Blaming a file is a two step process: |
| 35 | // |
| 36 | // 1. Create a linear history of the commits affecting a file. We use |
| 37 | // revlist.New for that. |
| 38 | // |
| 39 | // 2. Then build a graph with a node for every line in every file in |
| 40 | // the history of the file. |
| 41 | // |
| 42 | // Each node is assigned a commit: Start by the nodes in the first |
| 43 | // commit. Assign that commit as the creator of all its lines. |
| 44 | // |
| 45 | // Then jump to the nodes in the next commit, and calculate the diff |
| 46 | // between the two files. Newly created lines get |
| 47 | // assigned the new commit as its origin. Modified lines also get |
| 48 | // this new commit. Untouched lines retain the old commit. |
| 49 | // |
| 50 | // All this work is done in the assignOrigin function which holds all |
| 51 | // the internal relevant data in a "blame" struct, that is not |
| 52 | // exported. |
| 53 | // |
| Máximo Cuadros | a6197bd | 2017-01-31 22:09:28 | [diff] [blame] | 54 | // TODO: ways to improve the efficiency of this function: |
| 55 | // 1. Improve revlist |
| 56 | // 2. Improve how to traverse the history (example a backward traversal will |
| 57 | // be much more efficient) |
| 58 | // |
| 59 | // TODO: ways to improve the function in general: |
| 60 | // 1. Add memoization between revlist and assign. |
| 61 | // 2. It is using much more memory than needed, see the TODOs below. |
| 62 | |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 63 | b := new(blame) |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 64 | b.fRev = c |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 65 | b.path = path |
| 66 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 67 | // get all the file revisions |
| 68 | if err := b.fillRevs(); err != nil { |
| 69 | return nil, err |
| 70 | } |
| 71 | |
| 72 | // calculate the line tracking graph and fill in |
| 73 | // file contents in data. |
| 74 | if err := b.fillGraphAndData(); err != nil { |
| 75 | return nil, err |
| 76 | } |
| 77 | |
| 78 | file, err := b.fRev.File(b.path) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 79 | if err != nil { |
| 80 | return nil, err |
| 81 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 82 | finalLines, err := file.Lines() |
| 83 | if err != nil { |
| 84 | return nil, err |
| 85 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 86 | |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 87 | // Each node (line) holds the commit where it was introduced or |
| 88 | // last modified. To achieve that we use the FORWARD algorithm |
| 89 | // described in Zimmermann, et al. "Mining Version Archives for |
| 90 | // Co-changed Lines", in proceedings of the Mining Software |
| 91 | // Repositories workshop, Shanghai, May 22-23, 2006. |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 92 | lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1)) |
| 93 | if err != nil { |
| 94 | return nil, err |
| 95 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 96 | |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 97 | return &BlameResult{ |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 98 | Path: path, |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 99 | Rev: c.Hash, |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 100 | Lines: lines, |
| 101 | }, nil |
| 102 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 103 | |
| Alberto Cortés | 4fe64a1 | 2017-01-19 10:17:55 | [diff] [blame] | 104 | // Line values represent the contents and author of a line in BlamedResult values. |
| 105 | type Line struct { |
| Antonio Jesus Navarro Perez | be7e1e6 | 2017-03-02 09:48:26 | [diff] [blame] | 106 | // Author is the email address of the last author that modified the line. |
| Antonio Jesus Navarro Perez | 9006915 | 2017-02-28 14:09:15 | [diff] [blame] | 107 | Author string |
| 108 | // Text is the original text of the line. |
| 109 | Text string |
| Shane Da Silva | 0c19e6b | 2018-02-19 17:19:00 | [diff] [blame] | 110 | // Date is when the original text of the line was introduced |
| 111 | Date time.Time |
| Shane Da Silva | a3cf123 | 2018-03-28 00:55:08 | [diff] [blame] | 112 | // Hash is the commit hash that introduced the original line |
| 113 | Hash plumbing.Hash |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 114 | } |
| 115 | |
| Shane Da Silva | a3cf123 | 2018-03-28 00:55:08 | [diff] [blame] | 116 | func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line { |
| Alberto Cortés | 4fe64a1 | 2017-01-19 10:17:55 | [diff] [blame] | 117 | return &Line{ |
| 118 | Author: author, |
| 119 | Text: text, |
| Shane Da Silva | a3cf123 | 2018-03-28 00:55:08 | [diff] [blame] | 120 | Hash: hash, |
| Shane Da Silva | 0c19e6b | 2018-02-19 17:19:00 | [diff] [blame] | 121 | Date: date, |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 122 | } |
| 123 | } |
| 124 | |
| Alberto Cortés | 4fe64a1 | 2017-01-19 10:17:55 | [diff] [blame] | 125 | func newLines(contents []string, commits []*object.Commit) ([]*Line, error) { |
| Máximo Cuadros | c4be044 | 2018-10-15 22:12:10 | [diff] [blame] | 126 | lcontents := len(contents) |
| 127 | lcommits := len(commits) |
| 128 | |
| 129 | if lcontents != lcommits { |
| 130 | if lcontents == lcommits-1 && contents[lcontents-1] != "\n" { |
| 131 | contents = append(contents, "\n") |
| 132 | } else { |
| 133 | return nil, errors.New("contents and commits have different length") |
| 134 | } |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 135 | } |
| Máximo Cuadros | c4be044 | 2018-10-15 22:12:10 | [diff] [blame] | 136 | |
| 137 | result := make([]*Line, 0, lcontents) |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 138 | for i := range contents { |
| Máximo Cuadros | c4be044 | 2018-10-15 22:12:10 | [diff] [blame] | 139 | result = append(result, newLine( |
| 140 | commits[i].Author.Email, contents[i], |
| 141 | commits[i].Author.When, commits[i].Hash, |
| 142 | )) |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 143 | } |
| Máximo Cuadros | c4be044 | 2018-10-15 22:12:10 | [diff] [blame] | 144 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 145 | return result, nil |
| 146 | } |
| 147 | |
| 148 | // this struct is internally used by the blame function to hold its |
| 149 | // inputs, outputs and state. |
| 150 | type blame struct { |
| Antonio Jesus Navarro Perez | 9251df1 | 2017-02-28 11:17:13 | [diff] [blame] | 151 | // the path of the file to blame |
| 152 | path string |
| 153 | // the commit of the final revision of the file to blame |
| 154 | fRev *object.Commit |
| 155 | // the chain of revisions affecting the the file to blame |
| 156 | revs []*object.Commit |
| 157 | // the contents of the file across all its revisions |
| 158 | data []string |
| 159 | // the graph of the lines in the file across all the revisions |
| 160 | graph [][]*object.Commit |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 161 | } |
| 162 | |
| Antonio Jesus Navarro Perez | be8d194 | 2017-04-10 14:48:40 | [diff] [blame] | 163 | // calculate the history of a file "path", starting from commit "from", sorted by commit date. |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 164 | func (b *blame) fillRevs() error { |
| 165 | var err error |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 166 | |
| Antonio Jesus Navarro Perez | be8d194 | 2017-04-10 14:48:40 | [diff] [blame] | 167 | b.revs, err = references(b.fRev, b.path) |
| ferhat elmas | 18e6f9d | 2017-11-29 01:24:31 | [diff] [blame] | 168 | return err |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 169 | } |
| 170 | |
| 171 | // build graph of a file from its revision history |
| 172 | func (b *blame) fillGraphAndData() error { |
| Antonio Jesus Navarro Perez | 9251df1 | 2017-02-28 11:17:13 | [diff] [blame] | 173 | //TODO: not all commits are needed, only the current rev and the prev |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 174 | b.graph = make([][]*object.Commit, len(b.revs)) |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 175 | b.data = make([]string, len(b.revs)) // file contents in all the revisions |
| 176 | // for every revision of the file, starting with the first |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 177 | // one... |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 178 | for i, rev := range b.revs { |
| 179 | // get the contents of the file |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 180 | file, err := rev.File(b.path) |
| 181 | if err != nil { |
| 182 | return nil |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 183 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 184 | b.data[i], err = file.Contents() |
| 185 | if err != nil { |
| 186 | return err |
| 187 | } |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 188 | nLines := countLines(b.data[i]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 189 | // create a node for each line |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 190 | b.graph[i] = make([]*object.Commit, nLines) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 191 | // assign a commit to each node |
| 192 | // if this is the first revision, then the node is assigned to |
| 193 | // this first commit. |
| 194 | if i == 0 { |
| 195 | for j := 0; j < nLines; j++ { |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 196 | b.graph[i][j] = (*object.Commit)(b.revs[i]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 197 | } |
| 198 | } else { |
| 199 | // if this is not the first commit, then assign to the old |
| 200 | // commit or to the new one, depending on what the diff |
| 201 | // says. |
| 202 | b.assignOrigin(i, i-1) |
| 203 | } |
| 204 | } |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 205 | return nil |
| 206 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 207 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 208 | // sliceGraph returns a slice of commits (one per line) for a particular |
| 209 | // revision of a file (0=first revision). |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 210 | func (b *blame) sliceGraph(i int) []*object.Commit { |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 211 | fVs := b.graph[i] |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 212 | result := make([]*object.Commit, 0, len(fVs)) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 213 | for _, v := range fVs { |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 214 | c := object.Commit(*v) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 215 | result = append(result, &c) |
| 216 | } |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 217 | return result |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 218 | } |
| 219 | |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 220 | // Assigns origin to vertexes in current (c) rev from data in its previous (p) |
| 221 | // revision |
| 222 | func (b *blame) assignOrigin(c, p int) { |
| 223 | // assign origin based on diff info |
| 224 | hunks := diff.Do(b.data[p], b.data[c]) |
| 225 | sl := -1 // source line |
| 226 | dl := -1 // destination line |
| 227 | for h := range hunks { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 228 | hLines := countLines(hunks[h].Text) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 229 | for hl := 0; hl < hLines; hl++ { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 230 | switch { |
| 231 | case hunks[h].Type == 0: |
| 232 | sl++ |
| 233 | dl++ |
| 234 | b.graph[c][dl] = b.graph[p][sl] |
| 235 | case hunks[h].Type == 1: |
| 236 | dl++ |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 237 | b.graph[c][dl] = (*object.Commit)(b.revs[c]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 238 | case hunks[h].Type == -1: |
| 239 | sl++ |
| 240 | default: |
| 241 | panic("unreachable") |
| 242 | } |
| 243 | } |
| 244 | } |
| 245 | } |
| 246 | |
| Alberto Cortés | c347e97 | 2015-12-11 16:57:10 | [diff] [blame] | 247 | // GoString prints the results of a Blame using git-blame's style. |
| 248 | func (b *blame) GoString() string { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 249 | var buf bytes.Buffer |
| 250 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 251 | file, err := b.fRev.File(b.path) |
| 252 | if err != nil { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 253 | panic("PrettyPrint: internal error in repo.Data") |
| 254 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 255 | contents, err := file.Contents() |
| 256 | if err != nil { |
| 257 | panic("PrettyPrint: internal error in repo.Data") |
| 258 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 259 | |
| 260 | lines := strings.Split(contents, "\n") |
| 261 | // max line number length |
| ferhat elmas | 18e6f9d | 2017-11-29 01:24:31 | [diff] [blame] | 262 | mlnl := len(strconv.Itoa(len(lines))) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 263 | // max author length |
| 264 | mal := b.maxAuthorLength() |
| 265 | format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n", |
| 266 | mal, mlnl) |
| 267 | |
| 268 | fVs := b.graph[len(b.graph)-1] |
| 269 | for ln, v := range fVs { |
| 270 | fmt.Fprintf(&buf, format, v.Hash.String()[:8], |
| 271 | prettyPrintAuthor(fVs[ln]), ln+1, lines[ln]) |
| 272 | } |
| 273 | return buf.String() |
| 274 | } |
| 275 | |
| 276 | // utility function to pretty print the author. |
| Santiago M. Mola | 0af572d | 2016-12-14 22:12:44 | [diff] [blame] | 277 | func prettyPrintAuthor(c *object.Commit) string { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 278 | return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02")) |
| 279 | } |
| 280 | |
| 281 | // utility function to calculate the number of runes needed |
| 282 | // to print the longest author name in the blame of a file. |
| 283 | func (b *blame) maxAuthorLength() int { |
| Máximo Cuadros | ac095bb | 2016-11-08 22:46:38 | [diff] [blame] | 284 | memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 285 | fVs := b.graph[len(b.graph)-1] |
| 286 | m := 0 |
| 287 | for ln := range fVs { |
| 288 | if _, ok := memo[fVs[ln].Hash]; ok { |
| 289 | continue |
| 290 | } |
| 291 | memo[fVs[ln].Hash] = struct{}{} |
| 292 | m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln]))) |
| 293 | } |
| 294 | return m |
| 295 | } |
| 296 | |
| 297 | func max(a, b int) int { |
| 298 | if a > b { |
| 299 | return a |
| 300 | } |
| 301 | return b |
| 302 | } |