From c460632decd622eddcb272af2e0a0011ba4d3b04 Mon Sep 17 00:00:00 2001 From: Matt Soucy Date: Fri, 3 Jul 2020 10:55:03 -0400 Subject: [PATCH] Refactor --- build.gradle | 7 +- src/main/kotlin/me/msoucy/gbat/Analyze.kt | 56 +++++-- src/main/kotlin/me/msoucy/gbat/Main.kt | 15 +- .../kotlin/me/msoucy/gbat/ParseHistory.kt | 139 ++++++++++++++++++ src/main/kotlin/me/msoucy/gbat/Repo.kt | 4 +- .../me/msoucy/gbat/models/KnowledgeModel.kt | 2 +- .../kotlin/me/msoucy/gbat/models/LineModel.kt | 2 +- .../kotlin/me/msoucy/gbat/models/Models.kt | 38 ++++- .../me/msoucy/gbat/models/SummaryModel.kt | 16 +- 9 files changed, 236 insertions(+), 43 deletions(-) create mode 100644 src/main/kotlin/me/msoucy/gbat/ParseHistory.kt diff --git a/build.gradle b/build.gradle index b26e55f..df4b93f 100644 --- a/build.gradle +++ b/build.gradle @@ -19,8 +19,9 @@ repositories { dependencies { implementation 'org.jetbrains.kotlin:kotlin-stdlib' implementation "com.xenomachina:kotlin-argparser:$kotlin_argparser_version" - implementation "org.jetbrains.exposed:exposed-core:0.24.1" - implementation "org.jetbrains.exposed:exposed-dao:0.24.1" - implementation "org.jetbrains.exposed:exposed-jdbc:0.24.1" + implementation "org.jetbrains.exposed:exposed-core:0.25.1" + implementation "org.jetbrains.exposed:exposed-dao:0.25.1" + implementation "org.jetbrains.exposed:exposed-jdbc:0.25.1" + compile("org.xerial:sqlite-jdbc:3.30.1") testImplementation 'junit:junit:4.12' } diff --git a/src/main/kotlin/me/msoucy/gbat/Analyze.kt b/src/main/kotlin/me/msoucy/gbat/Analyze.kt index 2e9690d..c26f62f 100644 --- a/src/main/kotlin/me/msoucy/gbat/Analyze.kt +++ b/src/main/kotlin/me/msoucy/gbat/Analyze.kt @@ -1,27 +1,51 @@ package me.msoucy.gbat import java.io.File +import org.jetbrains.exposed.sql.* +import org.jetbrains.exposed.sql.transactions.transaction +import me.msoucy.gbat.models.CondensedAnalysis +import me.msoucy.gbat.models.Condensation import me.msoucy.gbat.models.KnowledgeModel import me.msoucy.gbat.models.LineModel import me.msoucy.gbat.models.RiskModel -private data class Condensation(val authors : List, val knowledge : Double, val orphaned : Double, val atRisk : Double = 0.0) : Comparable { - override operator fun compareTo(other : Condensation) : Int { - return -1 - } -} -private class Result(val repoRoot : File, - val projectRoot : File, - val fname : File, - val results : List>>) +fun analyze( + repoRoot : String, + projectRoot : String, + fname : String, + riskModel : RiskModel, + createdConstant : Double, + historyItem : HistoryItem, + verbose : Boolean = false +) : CondensedAnalysis { + val lineModel = LineModel() + val db = Database.connect("jdbc:sqlite:memory:", "org.sqlite.JDBC") + val knowledgeModel = KnowledgeModel(db, createdConstant, riskModel) + var changesProcessed = 0 -private fun condenseAnalysis(repoRoot : File, - projectRoot : File, - fname : File, - lineModel : LineModel, - knowledgeModel : KnowledgeModel, - riskModel : RiskModel) : Result { + historyItem.authorDiffs.forEach { (author, changes) -> + changes.forEach { change -> + changesProcessed++ + if(changesProcessed % 1000 == 0 && verbose) { + System.err.println("Analyzer applied change #${changesProcessed}") + } + lineModel.apply(change.eventType, change.lineNum, change.lineVal ?: "") + knowledgeModel.apply(change.eventType, author, change.lineNum) + } + } + + return condenseAnalysis(repoRoot, projectRoot, fname, lineModel, knowledgeModel, riskModel) +} + +private fun condenseAnalysis( + repoRoot : String, + projectRoot : String, + fname : String, + lineModel : LineModel, + knowledgeModel : KnowledgeModel, + riskModel : RiskModel +) : CondensedAnalysis { val condensations = lineModel.get().mapIndexed { idx, line -> val knowledges = knowledgeModel.knowledgeSummary(idx + 1).map { (authors, knowledge) -> Condensation(authors, @@ -31,5 +55,5 @@ private fun condenseAnalysis(repoRoot : File, }.sorted() Pair(line, knowledges) } - return Result(repoRoot, projectRoot, fname, condensations) + return CondensedAnalysis(repoRoot, projectRoot, fname, condensations.mutableCopyOf()) } \ No newline at end of file diff --git a/src/main/kotlin/me/msoucy/gbat/Main.kt b/src/main/kotlin/me/msoucy/gbat/Main.kt index 761c7be..94ce2d0 100644 --- a/src/main/kotlin/me/msoucy/gbat/Main.kt +++ b/src/main/kotlin/me/msoucy/gbat/Main.kt @@ -93,11 +93,11 @@ fun main(args: Array) = mainBody { } } - val risk_thresh = risk_threshold ?: default_bus_risk.pow(3) + val riskThresh = risk_threshold ?: default_bus_risk.pow(3) val interesting_res = parse_interesting(if (interesting.isEmpty()) DEFAULT_INTERESTING_RES else interesting) val not_interesting_res = if (not_interesting.isEmpty()) listOf() else parse_interesting(not_interesting) - val project_root_file = File(project_root).also { + val projectRootFile = File(project_root).also { if(!it.isDirectory) throw InvalidArgumentException("Provided project root does not exist") } @@ -125,7 +125,14 @@ fun main(args: Array) = mainBody { } val pool = Executors.newFixedThreadPool(num_analyzer_procs + num_git_procs + 1) + + fnames.forEach { fname -> + pool.submit { + parseHistory(repo, projectRootFile, File(fname)) + } + } + val summ_result = mutableListOf() - - } + val dbFname = File(outDir, "summary.db") + } } diff --git a/src/main/kotlin/me/msoucy/gbat/ParseHistory.kt b/src/main/kotlin/me/msoucy/gbat/ParseHistory.kt new file mode 100644 index 0000000..d32ccf6 --- /dev/null +++ b/src/main/kotlin/me/msoucy/gbat/ParseHistory.kt @@ -0,0 +1,139 @@ +package me.msoucy.gbat + +import java.io.File +import kotlin.math.abs +import kotlin.math.max + +import me.msoucy.gbat.models.ChangeType +import me.msoucy.gbat.models.Event + +data class HistoryItem( + val repoRoot : File, + val projectRoot : File, + val fname : File, + val authorDiffs : List>> +) + +fun parseHistory(repo : GitRepo, + projectRoot : File, + fname : File, + verbose : Boolean = false) : HistoryItem { + val entries = repo.log(fname) + val repoRoot = repo.root() + if(verbose) { + System.err.println("Parsing history for ${fname}") + } + return HistoryItem(repoRoot, projectRoot, fname, + entries.map { (author, diff) -> + Pair(author.trim(), diffWalk(diff)) + } + ) +} + +fun diffWalk(diff : Diff) : List { + + fun String.startsChunk() = startsWith("@@") + fun String.isOldLine() = startsWith("-") + fun String.isNewLine() = startsWith("+") + + fun chunkify() : List> { + val chunks = mutableListOf>() + var curChunk = mutableListOf() + diff.split("\n").forEach { line -> + if(line.startsChunk()) { + if(curChunk.isNotEmpty()) { + chunks.add(curChunk) + curChunk = mutableListOf() + } + curChunk.add(line) + } else if(curChunk.isNotEmpty()) { + curChunk.add(line) + } + } + if(curChunk.isNotEmpty()) { + chunks.add(curChunk) + } + return chunks + } + + val chunks = chunkify() + val events = mutableListOf() + + class Hunk( + val lineNum : Int, + val oldLines : List, + val newLines : List + ) + + fun hunkize(chunkWoHeader : List, firstLineNum : Int) : List { + var curOld = mutableListOf() + var curNew = mutableListOf() + var curLine = firstLineNum + var hunks = mutableListOf() + + chunkWoHeader.forEach { line -> + if(line.isOldLine()) { + curOld.add(line) + } else if(line.isNewLine()) { + curNew.add(line) + } else if(curOld.isNotEmpty() || curNew.isNotEmpty()) { + hunks.add(Hunk(curLine, curOld, curNew)) + curLine += curNew.size + 1 + curOld = mutableListOf() + curNew = mutableListOf() + } else { + curLine++ + } + } + if(curOld.isNotEmpty() || curNew.isNotEmpty()) { + hunks.add(Hunk(curLine, curOld, curNew)) + } + + return hunks + } + + fun stepHunk(hunk : Hunk) { + val oldLen = hunk.oldLines.size + val newLen = hunk.newLines.size + val maxLen = max(oldLen, newLen) + var lineNum = hunk.lineNum + + for (i in 0..maxLen) { + if(i < oldLen && i < newLen) { + events += Event( + ChangeType.Change, + lineNum, + hunk.newLines[i].substring(1) + ) + } + } + } + + fun stepChunk(chunk : List) { + val header = chunk[0] + + // format of header is + // + // @@ -old_line_num,cnt_lines_in_old_chunk, +new_line_num,cnt_lines_in_new_chunk + // + val (_, lineInfo, _) = header.split("@@") + val offsets = lineInfo.trim().split(" ") + + // we only care about the new offset, since in the first chunk + // of the file the new and old are the same, and since we add + // and subtract lines as we go, we should stay in step with the + // new offsets. + val newOffset = offsets[1].split(",").map{ + abs(it.toInt()) + }.first() + + // a hunk is a group of contiguous - + lines + val hunks = hunkize(chunk.subList(1, chunk.size), newOffset) + + hunks.forEach(::stepHunk) + } + + chunks.forEach(::stepChunk) + + return events +} \ No newline at end of file diff --git a/src/main/kotlin/me/msoucy/gbat/Repo.kt b/src/main/kotlin/me/msoucy/gbat/Repo.kt index 24c1a97..7d6c691 100644 --- a/src/main/kotlin/me/msoucy/gbat/Repo.kt +++ b/src/main/kotlin/me/msoucy/gbat/Repo.kt @@ -20,14 +20,14 @@ class GitRepo(val projectRoot : File, val git_exe : String) { return out ?: "" } - fun root() : String? { + fun root() : File { val cmd = listOf( git_exe, "rev-parse", "--show-toplevel" ) val (out, _) = cmd.runCommand(projectRoot) - return out + return File(out) } fun log(fname : File) : List> { diff --git a/src/main/kotlin/me/msoucy/gbat/models/KnowledgeModel.kt b/src/main/kotlin/me/msoucy/gbat/models/KnowledgeModel.kt index 2b8ec32..f3d34f2 100644 --- a/src/main/kotlin/me/msoucy/gbat/models/KnowledgeModel.kt +++ b/src/main/kotlin/me/msoucy/gbat/models/KnowledgeModel.kt @@ -45,7 +45,7 @@ class KnowledgeModel(val db : Database, val constant : Double, val riskModel : R val SAFE_KNOWLEDGE_ACCT_ID = 1 val KNOWLEDGE_PER_LINE_ADDED = 1000.0 - fun applyChange(changeType : ChangeType, author : String, lineNum : Int) = when(changeType) { + fun apply(changeType : ChangeType, author : String, lineNum : Int) = when(changeType) { ChangeType.Add -> lineAdded(author, lineNum) ChangeType.Change -> lineChanged(author, lineNum) ChangeType.Remove -> lineRemoved(lineNum) diff --git a/src/main/kotlin/me/msoucy/gbat/models/LineModel.kt b/src/main/kotlin/me/msoucy/gbat/models/LineModel.kt index 4d64c60..fcfeeb7 100644 --- a/src/main/kotlin/me/msoucy/gbat/models/LineModel.kt +++ b/src/main/kotlin/me/msoucy/gbat/models/LineModel.kt @@ -12,7 +12,7 @@ class LineModel() { inner class Line(var num : Int, var text : String) val model = mutableSetOf() - fun applyChange(changeType : ChangeType, lineNum : Int, lineText : String) = when(changeType) { + fun apply(changeType : ChangeType, lineNum : Int, lineText : String) = when(changeType) { ChangeType.Add -> add(Line(lineNum, lineText)) ChangeType.Change -> change(Line(lineNum, lineText)) ChangeType.Remove -> del(Line(lineNum, lineText)) diff --git a/src/main/kotlin/me/msoucy/gbat/models/Models.kt b/src/main/kotlin/me/msoucy/gbat/models/Models.kt index ffbb6e3..7fdee05 100644 --- a/src/main/kotlin/me/msoucy/gbat/models/Models.kt +++ b/src/main/kotlin/me/msoucy/gbat/models/Models.kt @@ -2,4 +2,40 @@ package me.msoucy.gbat.models enum class ChangeType { Add, Change, Remove -} \ No newline at end of file +} + +data class Event( + val eventType : ChangeType, + val lineNum : Int, + val lineVal : String? +) + +data class Condensation( + val authors : List, + val knowledge : Double, + val orphaned : Double, + val risk : Double = 0.0 +) : Comparable { + override operator fun compareTo(other : Condensation) : Int { + var result = authors.size.compareTo(other.authors.size) + if(result == 0) { + authors.zip(other.authors).forEach { (a, b) -> + if(result == 0) result = a.compareTo(b) + } + } + if(result == 0) + result = knowledge.compareTo(other.knowledge) + if(result == 0) + result = orphaned.compareTo(other.orphaned) + if(result == 0) + result = risk.compareTo(other.risk) + return result + } +} + +class CondensedAnalysis( + var repoRoot : String = "", + var projectRoot : String = "", + var fileName : String = "", + var lineSummaries : MutableList>> = mutableListOf() +) \ No newline at end of file diff --git a/src/main/kotlin/me/msoucy/gbat/models/SummaryModel.kt b/src/main/kotlin/me/msoucy/gbat/models/SummaryModel.kt index e25bd0c..878d10f 100644 --- a/src/main/kotlin/me/msoucy/gbat/models/SummaryModel.kt +++ b/src/main/kotlin/me/msoucy/gbat/models/SummaryModel.kt @@ -7,20 +7,6 @@ import org.jetbrains.exposed.dao.id.IntIdTable import org.jetbrains.exposed.sql.* import org.jetbrains.exposed.sql.transactions.transaction -class CondensedAnalysis { - class LineSummary { - var authors = listOf() - var knowledge = 0.0 - var risk = 0.0 - var orphaned = 0.0 - } - var repoRoot = "" - var project = "" - var projectRoot = "" - var fileName = "" - var lineSummaries = mutableListOf>>() -} - class SummaryModel(val db : Database) { object ProjectTable : IntIdTable("projects", "projectid") { @@ -112,7 +98,7 @@ class SummaryModel(val db : Database) { fun summarize(ca : CondensedAnalysis) { val fname = adjustFname(File(ca.repoRoot), File(ca.projectRoot), File(ca.fileName)) - val projectId = findOrCreateProject(ca.project) + val projectId = findOrCreateProject(ca.projectRoot) var parentDirId = 0 splitAllDirs(fname.parentFile).forEach {