-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathVariationDiffMiner.java
More file actions
170 lines (147 loc) · 7.89 KB
/
VariationDiffMiner.java
File metadata and controls
170 lines (147 loc) · 7.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
package org.variantsync.diffdetective.mining;
import org.apache.commons.io.FileUtils;
import org.tinylog.Logger;
import org.variantsync.diffdetective.analysis.*;
import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
import org.variantsync.diffdetective.analysis.strategies.AnalyzeAllThenExport;
import org.variantsync.diffdetective.datasets.*;
import org.variantsync.diffdetective.datasets.predefined.StanciulescuMarlin;
import org.variantsync.diffdetective.examplesearch.ExampleCriterions;
import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
import org.variantsync.diffdetective.mining.formats.DirectedEdgeLabelFormat;
import org.variantsync.diffdetective.mining.formats.MiningNodeFormat;
import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat;
import org.variantsync.diffdetective.variation.DiffLinesLabel;
import org.variantsync.diffdetective.variation.diff.VariationDiff;
import org.variantsync.diffdetective.variation.diff.filter.VariationDiffFilter;
import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat;
import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat;
import org.variantsync.diffdetective.variation.diff.serialize.treeformat.CommitDiffVariationDiffLabelFormat;
import org.variantsync.diffdetective.variation.diff.transform.CollapseNestedNonEditedAnnotations;
import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees;
import org.variantsync.diffdetective.variation.diff.transform.Transformer;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Consumer;
public class VariationDiffMiner {
public static final Path DATASET_FILE = DefaultDatasets.EMACS;
public static final boolean SEARCH_FOR_GOOD_RUNNING_EXAMPLES = false;
public static final boolean UPDATE_REPOS_BEFORE_MINING = false;
// public static final boolean PRINT_LATEX_TABLE = true;
// public static final int PRINT_LARGEST_SUBJECTS = 3;
public static final boolean DEBUG_TEST = false;
public static List<Transformer<VariationDiff<DiffLinesLabel>>> Postprocessing(final Repository repository) {
final List<Transformer<VariationDiff<DiffLinesLabel>>> processing = new ArrayList<>();
processing.add(new CutNonEditedSubtrees<>());
processing.add(new CollapseNestedNonEditedAnnotations());
return processing;
}
public static MiningNodeFormat NodeFormat() {
return
// new DebugMiningDiffNodeFormat();
new ReleaseMiningDiffNodeFormat();
}
public static EdgeLabelFormat<DiffLinesLabel> EdgeFormat() {
return EdgeFormat(NodeFormat());
}
private static EdgeLabelFormat<DiffLinesLabel> EdgeFormat(final MiningNodeFormat nodeFormat) {
final EdgeLabelFormat.Direction direction = EdgeLabelFormat.Direction.ParentToChild;
return
// new DefaultEdgeLabelFormat(direction);
new DirectedEdgeLabelFormat(nodeFormat, false, direction);
}
public static LineGraphExportOptions<DiffLinesLabel> MiningExportOptions(final Repository repository) {
final MiningNodeFormat nodeFormat = NodeFormat();
return new LineGraphExportOptions<>(
GraphFormat.VARIATION_DIFF
// We have to ensure that all VariationDiffs have unique IDs, so use name of changed file and commit hash.
, new CommitDiffVariationDiffLabelFormat()
, nodeFormat
, EdgeFormat(nodeFormat)
, LineGraphExportOptions.LogError()
.andThen(LineGraphExportOptions.RenderError())
.andThen(LineGraphExportOptions.SysExitOnError())
);
}
public static AnalysisStrategy MiningStrategy() {
return new AnalyzeAllThenExport();
// new CompositeVariationDiffMiningStrategy(
// new MineAndExportIncrementally(1000),
// new MiningMonitor(10)
// );
}
public static BiFunction<Repository, Path, Analysis> AnalysisFactory =
(repo, repoOutputDir) -> new Analysis(
"VariationDiffMiner",
List.of(
new PreprocessingAnalysis(Postprocessing(repo)),
new FilterAnalysis(
VariationDiffFilter.notEmpty(),
VariationDiffFilter.moreThanOneArtifactNode(),
/// We want to exclude patches that do not edit variability.
/// In particular, we noticed that most edits just insert or delete artifacts (or replace it).
/// This is reasonable and was also observed in previous studies: Edits to artifacts are more frequent than edits to variability.
/// Yet, such edits cannot reveal compositions of more complex edits to variability.
/// We thus filter them.
VariationDiffFilter.hasAtLeastOneEditToVariability()
),
new LineGraphExportAnalysis(MiningStrategy(), MiningExportOptions(repo)),
new EditClassOccurenceAnalysis(MiningStrategy()),
new StatisticsAnalysis()
),
repo,
repoOutputDir
);
public static void main(String[] args) throws IOException {
// setupLogger(Level.INFO);
// setupLogger(Level.DEBUG);
final PatchDiffParseOptions.DiffStoragePolicy diffStoragePolicy = PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER;
final Path inputDir = Paths.get("..", "DiffDetectiveMining");
final Path outputDir = Paths.get("results", "variationdiffs");
final List<Repository> repos;
if (DEBUG_TEST) {
final Path variantEvolutionDatasetsDir = Paths.get("..", "variantevolution_datasets");
repos = List.of(
// Godot.cloneFromGithubTo(inputDir),
StanciulescuMarlin.fromZipInDiffDetectiveAt(Path.of("."))
// Vim.cloneFromGithubTo(inputDir),
// LinuxKernel.cloneFromGithubTo(variantEvolutionDatasetsDir)
);
} else {
final List<DatasetDescription> datasets = DefaultDatasets.loadDatasets(DATASET_FILE);
// if (PRINT_LATEX_TABLE) {
// EditClassValidation.printLaTeXTableFor(datasets);
// }
final DatasetFactory miningDatasetFactory = new DatasetFactory(inputDir);
repos = miningDatasetFactory.createAll(datasets, true, UPDATE_REPOS_BEFORE_MINING);
}
Logger.info("Mining the following repositories:");
for (final Repository repo : repos) {
repo.setParseOptions(repo.getParseOptions().withDiffStoragePolicy(diffStoragePolicy));
Logger.info(" - {} from {}", repo.getRepositoryName(), repo.getRemoteURI());
}
/* ************************ *\
| END OF ARGUMENTS |
\* ************************ */
final Consumer<Path> repoPostProcessing;
if (SEARCH_FOR_GOOD_RUNNING_EXAMPLES) {
repoPostProcessing = repoOutputDir -> {
new ExplainedFilterSummary(ExampleCriterions.DefaultExampleConditions()).exportTo(repoOutputDir.resolve("runningExampleFilterReasons.txt"));
};
} else {
repoPostProcessing = p -> {};
}
Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> {
Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir));
repoPostProcessing.accept(repoOutputDir);
});
Logger.info("Done");
final String logFile = "log.txt";
FileUtils.copyFile(Path.of(logFile).toFile(), outputDir.resolve(logFile).toFile());
}
}