| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 1 | package git |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 2 | |
| 3 | import ( |
| 4 | "bytes" |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 5 | "errors" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 6 | "fmt" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 7 | "strconv" |
| 8 | "strings" |
| 9 | "unicode/utf8" |
| 10 | |
| Máximo Cuadros | 6b0a598 | 2016-02-17 11:20:56 | [diff] [blame] | 11 | "gopkg.in/src-d/go-git.v3/core" |
| 12 | "gopkg.in/src-d/go-git.v3/diff" |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 13 | ) |
| 14 | |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 15 | type Blame struct { |
| 16 | Path string |
| 17 | Rev core.Hash |
| 18 | Lines []*line |
| 19 | } |
| 20 | |
| 21 | // Blame returns the last commit that modified each line of a file in a |
| 22 | // repository. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 23 | // |
| 24 | // The file to blame is identified by the input arguments: repo, commit and path. |
| 25 | // The output is a slice of commits, one for each line in the file. |
| 26 | // |
| 27 | // Blaming a file is a two step process: |
| 28 | // |
| 29 | // 1. Create a linear history of the commits affecting a file. We use |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 30 | // revlist.New for that. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 31 | // |
| 32 | // 2. Then build a graph with a node for every line in every file in |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 33 | // the history of the file. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 34 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 35 | // Each node (line) holds the commit where it was introduced or |
| 36 | // last modified. To achieve that we use the FORWARD algorithm |
| 37 | // described in Zimmermann, et al. "Mining Version Archives for |
| 38 | // Co-changed Lines", in proceedings of the Mining Software |
| 39 | // Repositories workshop, Shanghai, May 22-23, 2006. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 40 | // |
| Máximo Cuadros | 08f9e70 | 2016-05-19 11:28:17 | [diff] [blame] | 41 | // Each node is assigned a commit: Start by the nodes in the first |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 42 | // commit. Assign that commit as the creator of all its lines. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 43 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 44 | // Then jump to the nodes in the next commit, and calculate the diff |
| 45 | // between the two files. Newly created lines get |
| 46 | // assigned the new commit as its origin. Modified lines also get |
| 47 | // this new commit. Untouched lines retain the old commit. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 48 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 49 | // All this work is done in the assignOrigin function which holds all |
| 50 | // the internal relevant data in a "blame" struct, that is not |
| 51 | // exported. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 52 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 53 | // TODO: ways to improve the efficiency of this function: |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 54 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 55 | // 1. Improve revlist |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 56 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 57 | // 2. Improve how to traverse the history (example a backward |
| 58 | // traversal will be much more efficient) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 59 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 60 | // TODO: ways to improve the function in general: |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 61 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 62 | // 1. Add memoization between revlist and assign. |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 63 | // |
| Máximo Cuadros | 1931dfb | 2016-02-16 16:31:09 | [diff] [blame] | 64 | // 2. It is using much more memory than needed, see the TODOs below. |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 65 | func (c *Commit) Blame(path string) (*Blame, error) { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 66 | b := new(blame) |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 67 | b.fRev = c |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 68 | b.path = path |
| 69 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 70 | // get all the file revisions |
| 71 | if err := b.fillRevs(); err != nil { |
| 72 | return nil, err |
| 73 | } |
| 74 | |
| 75 | // calculate the line tracking graph and fill in |
| 76 | // file contents in data. |
| 77 | if err := b.fillGraphAndData(); err != nil { |
| 78 | return nil, err |
| 79 | } |
| 80 | |
| 81 | file, err := b.fRev.File(b.path) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 82 | if err != nil { |
| 83 | return nil, err |
| 84 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 85 | finalLines, err := file.Lines() |
| 86 | if err != nil { |
| 87 | return nil, err |
| 88 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 89 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 90 | lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1)) |
| 91 | if err != nil { |
| 92 | return nil, err |
| 93 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 94 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 95 | return &Blame{ |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 96 | Path: path, |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 97 | Rev: c.Hash, |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 98 | Lines: lines, |
| 99 | }, nil |
| 100 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 101 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 102 | type line struct { |
| 103 | author string |
| 104 | text string |
| 105 | } |
| 106 | |
| 107 | func newLine(author, text string) *line { |
| 108 | return &line{ |
| 109 | author: author, |
| 110 | text: text, |
| 111 | } |
| 112 | } |
| 113 | |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 114 | func newLines(contents []string, commits []*Commit) ([]*line, error) { |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 115 | if len(contents) != len(commits) { |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 116 | return nil, errors.New("contents and commits have different length") |
| 117 | } |
| 118 | result := make([]*line, 0, len(contents)) |
| 119 | for i := range contents { |
| 120 | l := newLine(commits[i].Author.Email, contents[i]) |
| 121 | result = append(result, l) |
| 122 | } |
| 123 | return result, nil |
| 124 | } |
| 125 | |
| 126 | // this struct is internally used by the blame function to hold its |
| 127 | // inputs, outputs and state. |
| 128 | type blame struct { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 129 | path string // the path of the file to blame |
| 130 | fRev *Commit // the commit of the final revision of the file to blame |
| 131 | revs []*Commit // the chain of revisions affecting the the file to blame |
| 132 | data []string // the contents of the file across all its revisions |
| 133 | graph [][]*Commit // the graph of the lines in the file across all the revisions TODO: not all commits are needed, only the current rev and the prev |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 134 | } |
| 135 | |
| Alberto Cortés | c347e97 | 2015-12-11 16:57:10 | [diff] [blame] | 136 | // calculte the history of a file "path", starting from commit "from", sorted by commit date. |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 137 | func (b *blame) fillRevs() error { |
| 138 | var err error |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 139 | |
| 140 | b.revs, err = b.fRev.References(b.path) |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 141 | if err != nil { |
| 142 | return err |
| 143 | } |
| 144 | return nil |
| 145 | } |
| 146 | |
| 147 | // build graph of a file from its revision history |
| 148 | func (b *blame) fillGraphAndData() error { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 149 | b.graph = make([][]*Commit, len(b.revs)) |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 150 | b.data = make([]string, len(b.revs)) // file contents in all the revisions |
| 151 | // for every revision of the file, starting with the first |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 152 | // one... |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 153 | for i, rev := range b.revs { |
| 154 | // get the contents of the file |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 155 | file, err := rev.File(b.path) |
| 156 | if err != nil { |
| 157 | return nil |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 158 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 159 | b.data[i], err = file.Contents() |
| 160 | if err != nil { |
| 161 | return err |
| 162 | } |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 163 | nLines := countLines(b.data[i]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 164 | // create a node for each line |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 165 | b.graph[i] = make([]*Commit, nLines) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 166 | // assign a commit to each node |
| 167 | // if this is the first revision, then the node is assigned to |
| 168 | // this first commit. |
| 169 | if i == 0 { |
| 170 | for j := 0; j < nLines; j++ { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 171 | b.graph[i][j] = (*Commit)(b.revs[i]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 172 | } |
| 173 | } else { |
| 174 | // if this is not the first commit, then assign to the old |
| 175 | // commit or to the new one, depending on what the diff |
| 176 | // says. |
| 177 | b.assignOrigin(i, i-1) |
| 178 | } |
| 179 | } |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 180 | return nil |
| 181 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 182 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 183 | // sliceGraph returns a slice of commits (one per line) for a particular |
| 184 | // revision of a file (0=first revision). |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 185 | func (b *blame) sliceGraph(i int) []*Commit { |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 186 | fVs := b.graph[i] |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 187 | result := make([]*Commit, 0, len(fVs)) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 188 | for _, v := range fVs { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 189 | c := Commit(*v) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 190 | result = append(result, &c) |
| 191 | } |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 192 | return result |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 193 | } |
| 194 | |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 195 | // Assigns origin to vertexes in current (c) rev from data in its previous (p) |
| 196 | // revision |
| 197 | func (b *blame) assignOrigin(c, p int) { |
| 198 | // assign origin based on diff info |
| 199 | hunks := diff.Do(b.data[p], b.data[c]) |
| 200 | sl := -1 // source line |
| 201 | dl := -1 // destination line |
| 202 | for h := range hunks { |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 203 | hLines := countLines(hunks[h].Text) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 204 | for hl := 0; hl < hLines; hl++ { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 205 | switch { |
| 206 | case hunks[h].Type == 0: |
| 207 | sl++ |
| 208 | dl++ |
| 209 | b.graph[c][dl] = b.graph[p][sl] |
| 210 | case hunks[h].Type == 1: |
| 211 | dl++ |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 212 | b.graph[c][dl] = (*Commit)(b.revs[c]) |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 213 | case hunks[h].Type == -1: |
| 214 | sl++ |
| 215 | default: |
| 216 | panic("unreachable") |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| Alberto Cortés | c347e97 | 2015-12-11 16:57:10 | [diff] [blame] | 222 | // GoString prints the results of a Blame using git-blame's style. |
| 223 | func (b *blame) GoString() string { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 224 | var buf bytes.Buffer |
| 225 | |
| Alberto Cortés | 48bf5bd | 2015-11-27 11:39:34 | [diff] [blame] | 226 | file, err := b.fRev.File(b.path) |
| 227 | if err != nil { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 228 | panic("PrettyPrint: internal error in repo.Data") |
| 229 | } |
| Joshua Sjoding | 0d999e1 | 2016-02-25 06:40:30 | [diff] [blame] | 230 | contents, err := file.Contents() |
| 231 | if err != nil { |
| 232 | panic("PrettyPrint: internal error in repo.Data") |
| 233 | } |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 234 | |
| 235 | lines := strings.Split(contents, "\n") |
| 236 | // max line number length |
| 237 | mlnl := len(fmt.Sprintf("%s", strconv.Itoa(len(lines)))) |
| 238 | // max author length |
| 239 | mal := b.maxAuthorLength() |
| 240 | format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n", |
| 241 | mal, mlnl) |
| 242 | |
| 243 | fVs := b.graph[len(b.graph)-1] |
| 244 | for ln, v := range fVs { |
| 245 | fmt.Fprintf(&buf, format, v.Hash.String()[:8], |
| 246 | prettyPrintAuthor(fVs[ln]), ln+1, lines[ln]) |
| 247 | } |
| 248 | return buf.String() |
| 249 | } |
| 250 | |
| 251 | // utility function to pretty print the author. |
| Máximo Cuadros | b543e4e | 2015-12-12 02:22:52 | [diff] [blame] | 252 | func prettyPrintAuthor(c *Commit) string { |
| Alberto Cortés | d643cea | 2015-11-16 02:20:01 | [diff] [blame] | 253 | return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02")) |
| 254 | } |
| 255 | |
| 256 | // utility function to calculate the number of runes needed |
| 257 | // to print the longest author name in the blame of a file. |
| 258 | func (b *blame) maxAuthorLength() int { |
| 259 | memo := make(map[core.Hash]struct{}, len(b.graph)-1) |
| 260 | fVs := b.graph[len(b.graph)-1] |
| 261 | m := 0 |
| 262 | for ln := range fVs { |
| 263 | if _, ok := memo[fVs[ln].Hash]; ok { |
| 264 | continue |
| 265 | } |
| 266 | memo[fVs[ln].Hash] = struct{}{} |
| 267 | m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln]))) |
| 268 | } |
| 269 | return m |
| 270 | } |
| 271 | |
| 272 | func max(a, b int) int { |
| 273 | if a > b { |
| 274 | return a |
| 275 | } |
| 276 | return b |
| 277 | } |