1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
package com.puppycrawl.tools.checkstyle.checks.duplicates; |
20 | |
|
21 | |
import com.google.common.collect.ArrayListMultimap; |
22 | |
import com.google.common.collect.Lists; |
23 | |
import com.google.common.collect.MapMaker; |
24 | |
import com.google.common.collect.Multimap; |
25 | |
import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck; |
26 | |
import com.puppycrawl.tools.checkstyle.api.FileText; |
27 | |
import com.puppycrawl.tools.checkstyle.api.MessageDispatcher; |
28 | |
import com.puppycrawl.tools.checkstyle.api.Utils; |
29 | |
import java.io.File; |
30 | |
import java.io.IOException; |
31 | |
import java.util.Collection; |
32 | |
import java.util.List; |
33 | |
import java.util.Map; |
34 | |
import org.apache.commons.logging.Log; |
35 | |
import org.apache.commons.logging.LogFactory; |
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | 1035 | public final class StrictDuplicateCodeCheck extends AbstractFileSetCheck |
48 | |
{ |
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
private static final int BIG_PRIME = 317; |
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
private interface ChecksumGenerator |
65 | |
{ |
66 | |
|
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
|
74 | |
|
75 | |
int[] convertLines(String[] aOriginalLines); |
76 | |
} |
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | 12 | private class TextfileChecksumGenerator implements ChecksumGenerator |
83 | |
{ |
84 | |
|
85 | |
public int[] convertLines(String[] aOriginalLines) |
86 | |
{ |
87 | 6 | final int lineCount = aOriginalLines.length; |
88 | 6 | final long[] checkSums = new long[lineCount]; |
89 | 149 | for (int i = 0; i < lineCount; i++) { |
90 | 143 | final String line = aOriginalLines[i]; |
91 | 143 | checkSums[i] = calcChecksum(line); |
92 | |
} |
93 | 6 | final int retLen = Math.max(0, lineCount - mMin + 1); |
94 | 6 | final int[] ret = new int[retLen]; |
95 | |
|
96 | 117 | for (int i = 0; i < retLen; i++) { |
97 | 111 | int blockChecksum = 0; |
98 | 111 | boolean onlyEmptyLines = true; |
99 | 1029 | for (int j = 0; j < mMin; j++) { |
100 | 924 | if (aOriginalLines[i + j].length() > 0) { |
101 | 614 | onlyEmptyLines = false; |
102 | |
} |
103 | 924 | final long checksum = checkSums[i + j]; |
104 | 924 | if (checksum == IGNORE) { |
105 | 6 | blockChecksum = IGNORE; |
106 | 6 | break; |
107 | |
} |
108 | 918 | blockChecksum += (j + 1) * BIG_PRIME * checksum; |
109 | |
} |
110 | 111 | ret[i] = onlyEmptyLines ? IGNORE : blockChecksum; |
111 | |
} |
112 | 6 | return ret; |
113 | |
} |
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
protected int calcChecksum(String aLine) |
121 | |
{ |
122 | 137 | final int hashCode = aLine.hashCode(); |
123 | 137 | if (hashCode == IGNORE) { |
124 | 0 | return Integer.MAX_VALUE / 2; |
125 | |
} |
126 | 137 | return hashCode; |
127 | |
} |
128 | |
} |
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | 12 | private class JavaChecksumGenerator extends TextfileChecksumGenerator |
134 | |
{ |
135 | |
|
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
@Override |
142 | |
protected int calcChecksum(String aLine) |
143 | |
{ |
144 | |
|
145 | |
|
146 | 143 | if (aLine.startsWith("import ")) { |
147 | 6 | return IGNORE; |
148 | |
} |
149 | 137 | return super.calcChecksum(aLine); |
150 | |
} |
151 | |
} |
152 | |
|
153 | |
|
154 | 1 | private static final Log LOG = |
155 | |
LogFactory.getLog(StrictDuplicateCodeCheck.class); |
156 | |
|
157 | |
|
158 | |
static final int IGNORE = Integer.MIN_VALUE; |
159 | |
|
160 | |
|
161 | |
private static final int DEFAULT_MIN_DUPLICATE_LINES = 12; |
162 | |
|
163 | |
|
164 | 4 | private int mMin = DEFAULT_MIN_DUPLICATE_LINES; |
165 | |
|
166 | |
|
167 | |
private String mBasedir; |
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
private int[][] mLineBlockChecksums; |
175 | |
|
176 | |
|
177 | |
|
178 | |
|
179 | |
|
180 | |
private ChecksumInfo[] mChecksumInfo; |
181 | |
|
182 | |
|
183 | 4 | private final List<File> mFiles = Lists.newArrayList(); |
184 | |
|
185 | |
|
186 | |
|
187 | |
|
188 | 4 | private final Map<String, String[]> mTrimmedLineCache = |
189 | |
new MapMaker().softValues().makeMap(); |
190 | |
|
191 | |
|
192 | |
|
193 | |
|
194 | |
private int mDuplicates; |
195 | |
|
196 | |
private String mCharset; |
197 | |
|
198 | |
|
199 | |
public StrictDuplicateCodeCheck() |
200 | 4 | { |
201 | 4 | } |
202 | |
|
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
|
208 | |
|
209 | |
|
210 | |
public void setMin(int aMin) |
211 | |
{ |
212 | 2 | if (aMin < 1) { |
213 | 0 | throw new IllegalArgumentException("min must be 1 or higher"); |
214 | |
} |
215 | 2 | mMin = aMin; |
216 | 2 | } |
217 | |
|
218 | |
|
219 | |
public void setBasedir(String aBasedir) |
220 | |
{ |
221 | 4 | mBasedir = aBasedir; |
222 | 4 | } |
223 | |
|
224 | |
@Override |
225 | |
public void beginProcessing(String aCharset) |
226 | |
{ |
227 | 4 | super.beginProcessing(aCharset); |
228 | 4 | mCharset = aCharset; |
229 | 4 | mFiles.clear(); |
230 | 4 | } |
231 | |
|
232 | |
@Override |
233 | |
protected void processFiltered(File aFile, List<String> aLines) |
234 | |
{ |
235 | 6 | mFiles.add(aFile); |
236 | 6 | } |
237 | |
|
238 | |
@Override |
239 | |
public void finishProcessing() |
240 | |
{ |
241 | 4 | super.finishProcessing(); |
242 | 4 | final long start = System.currentTimeMillis(); |
243 | 4 | mDuplicates = 0; |
244 | 4 | mLineBlockChecksums = new int[mFiles.size()][]; |
245 | 4 | mChecksumInfo = new ChecksumInfo[mFiles.size()]; |
246 | |
|
247 | 4 | if (LOG.isDebugEnabled()) { |
248 | 0 | LOG.debug("Reading " + mFiles.size() + " input files"); |
249 | |
} |
250 | |
|
251 | 10 | for (int i = 0; i < mFiles.size(); i++) { |
252 | 6 | final File file = mFiles.get(i); |
253 | |
try { |
254 | 6 | final String[] lines = getTrimmedLines(file); |
255 | 6 | final ChecksumGenerator transformer = |
256 | |
findChecksumGenerator(file); |
257 | 6 | mLineBlockChecksums[i] = transformer.convertLines(lines); |
258 | |
} |
259 | 0 | catch (final IOException ex) { |
260 | 0 | LOG.error("Cannot access " + file + " (" |
261 | |
+ ex.getMessage() + "), ignoring", ex); |
262 | |
|
263 | |
|
264 | |
|
265 | 0 | mLineBlockChecksums = new int[0][0]; |
266 | 6 | } |
267 | |
} |
268 | 4 | fillSortedRelevantChecksums(); |
269 | |
|
270 | 4 | final long endReading = System.currentTimeMillis(); |
271 | 4 | findDuplicates(); |
272 | 4 | final long endSearching = System.currentTimeMillis(); |
273 | |
|
274 | 4 | dumpStats(start, endReading, endSearching); |
275 | |
|
276 | 4 | mLineBlockChecksums = null; |
277 | 4 | mChecksumInfo = null; |
278 | 4 | } |
279 | |
|
280 | |
|
281 | |
|
282 | |
|
283 | |
|
284 | |
|
285 | |
|
286 | |
private ChecksumGenerator findChecksumGenerator(File aFile) |
287 | |
{ |
288 | 6 | if (aFile.getName().endsWith(".java")) { |
289 | 6 | return new JavaChecksumGenerator(); |
290 | |
} |
291 | |
|
292 | 0 | return new TextfileChecksumGenerator(); |
293 | |
} |
294 | |
|
295 | |
|
296 | |
|
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
private void dumpStats(long aStart, long aEndReading, long aEndSearching) |
302 | |
{ |
303 | 4 | if (LOG.isDebugEnabled()) { |
304 | 0 | final long initTime = aEndReading - aStart; |
305 | 0 | final long workTime = aEndSearching - aEndReading; |
306 | 0 | LOG.debug("files = " + mFiles.size()); |
307 | 0 | LOG.debug("duplicates = " + mDuplicates); |
308 | 0 | LOG.debug("Runtime = " + initTime + " + " + workTime); |
309 | |
} |
310 | 4 | } |
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | |
|
318 | |
|
319 | |
|
320 | |
private void fillSortedRelevantChecksums() |
321 | |
{ |
322 | 10 | for (int i = 0; i < mLineBlockChecksums.length; i++) { |
323 | 6 | final int[] checksums = mLineBlockChecksums[i]; |
324 | 6 | mChecksumInfo[i] = new ChecksumInfo(checksums); |
325 | |
} |
326 | 4 | } |
327 | |
|
328 | |
|
329 | |
|
330 | |
|
331 | |
|
332 | |
|
333 | |
private void findDuplicates() |
334 | |
{ |
335 | 4 | if (LOG.isDebugEnabled()) { |
336 | 0 | LOG.debug("Analysis phase"); |
337 | |
} |
338 | |
|
339 | |
|
340 | |
|
341 | |
|
342 | |
|
343 | |
|
344 | |
|
345 | 4 | final int len = mFiles.size(); |
346 | 10 | for (int i = 0; i < len; i++) { |
347 | |
|
348 | 6 | final String path = mFiles.get(i).getPath(); |
349 | 6 | getMessageCollector().reset(); |
350 | 6 | final MessageDispatcher dispatcher = getMessageDispatcher(); |
351 | 6 | dispatcher.fireFileStarted(path); |
352 | |
|
353 | 14 | for (int j = 0; j <= i; j++) { |
354 | 8 | findDuplicatesInFiles(i, j); |
355 | |
} |
356 | |
|
357 | 6 | fireErrors(path); |
358 | 6 | dispatcher.fireFileFinished(path); |
359 | |
} |
360 | 4 | } |
361 | |
|
362 | |
|
363 | |
|
364 | |
|
365 | |
|
366 | |
|
367 | |
private void findDuplicatesInFiles(int aI, int aJ) |
368 | |
{ |
369 | 8 | final ChecksumInfo iChecksumInfo = mChecksumInfo[aI]; |
370 | 8 | final ChecksumInfo jChecksumInfo = mChecksumInfo[aJ]; |
371 | 8 | if (!iChecksumInfo.hasChecksumOverlapsWith(jChecksumInfo)) { |
372 | 3 | return; |
373 | |
} |
374 | |
|
375 | 5 | final int[] iLineBlockChecksums = mLineBlockChecksums[aI]; |
376 | 5 | final int iBlockCount = iLineBlockChecksums.length; |
377 | |
|
378 | |
|
379 | |
|
380 | |
|
381 | 5 | final Multimap<Integer, Integer> ignorePairs = |
382 | |
ArrayListMultimap.create(); |
383 | |
|
384 | |
|
385 | |
|
386 | 116 | for (int iLine = 0; iLine < iBlockCount; iLine++) { |
387 | |
|
388 | 111 | final int iSum = iLineBlockChecksums[iLine]; |
389 | 111 | final int[] jLines = jChecksumInfo.findLinesWithChecksum(iSum); |
390 | |
|
391 | 111 | if (jLines.length > 0) { |
392 | 98 | findDuplicateFromLine(aI, aJ, iLine, jLines, ignorePairs); |
393 | |
} |
394 | |
} |
395 | 5 | } |
396 | |
|
397 | |
|
398 | |
|
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | |
|
404 | |
|
405 | |
|
406 | |
|
407 | |
|
408 | |
|
409 | |
|
410 | |
|
411 | |
private void findDuplicateFromLine( |
412 | |
final int aI, final int aJ, final int aILine, |
413 | |
final int[] aJLines, final Multimap<Integer, Integer> aIgnore) |
414 | |
{ |
415 | |
|
416 | |
|
417 | |
|
418 | 98 | final int[] iCheckSums = mLineBlockChecksums[aI]; |
419 | 98 | final int[] jCheckSums = mLineBlockChecksums[aJ]; |
420 | 98 | final long checkSum = iCheckSums[aILine]; |
421 | |
|
422 | 220 | for (int jLine : aJLines) { |
423 | |
|
424 | 122 | if (aI == aJ && aILine >= jLine) { |
425 | 110 | continue; |
426 | |
} |
427 | |
|
428 | 12 | if (jCheckSums[jLine] != checkSum) { |
429 | 0 | continue; |
430 | |
} |
431 | |
|
432 | 12 | final Collection<Integer> ignoreEntries = aIgnore.get(aILine); |
433 | 12 | if (ignoreEntries != null && ignoreEntries.contains(jLine)) { |
434 | 6 | continue; |
435 | |
} |
436 | |
|
437 | 6 | final int duplicateLines = |
438 | |
verifiyDuplicateLines(aI, aJ, aILine, jLine); |
439 | 6 | if (duplicateLines >= mMin) { |
440 | 6 | reportDuplicate(duplicateLines, aILine, mFiles.get(aJ), jLine); |
441 | 6 | final int extend = duplicateLines - mMin; |
442 | 12 | for (int i = 0; i < extend; i++) { |
443 | 6 | final int offset = (i + 1); |
444 | 6 | aIgnore.put(aILine + offset, jLine + offset); |
445 | |
} |
446 | |
} |
447 | |
} |
448 | 98 | } |
449 | |
|
450 | |
|
451 | |
|
452 | |
|
453 | |
|
454 | |
|
455 | |
|
456 | |
|
457 | |
|
458 | |
|
459 | |
|
460 | |
|
461 | |
private int verifiyDuplicateLines( |
462 | |
int aI, int aJ, int aIStartLine, int aJStartLine) |
463 | |
{ |
464 | 6 | final File iFile = mFiles.get(aI); |
465 | 6 | final File jFile = mFiles.get(aJ); |
466 | |
try { |
467 | 6 | final String[] iLines = getTrimmedLines(iFile); |
468 | 6 | final String[] jLines = getTrimmedLines(jFile); |
469 | |
|
470 | 6 | int verified = 0; |
471 | 6 | int i = aIStartLine; |
472 | 6 | int j = aJStartLine; |
473 | |
while (i < iLines.length && j < jLines.length |
474 | 39 | && iLines[i++].equals(jLines[j++])) |
475 | |
{ |
476 | 33 | verified += 1; |
477 | |
} |
478 | 6 | return verified; |
479 | |
} |
480 | 0 | catch (IOException ex) { |
481 | 0 | LOG.error("Unable to verify potential duplicate for " |
482 | |
+ iFile + " and " + jFile, ex); |
483 | 0 | return 0; |
484 | |
} |
485 | |
} |
486 | |
|
487 | |
|
488 | |
|
489 | |
|
490 | |
|
491 | |
|
492 | |
|
493 | |
|
494 | |
|
495 | |
|
496 | |
|
497 | |
private String[] getTrimmedLines(File aFile) throws IOException |
498 | |
{ |
499 | 18 | final String path = aFile.getPath(); |
500 | 18 | final String[] cachedLines = mTrimmedLineCache.get(path); |
501 | 18 | if (cachedLines != null) { |
502 | 12 | return cachedLines; |
503 | |
} |
504 | 6 | final String charset = mCharset; |
505 | 6 | final FileText text = new FileText(aFile, charset); |
506 | 6 | final String[] lines = getTrimmed(text.toLinesArray()); |
507 | 6 | mTrimmedLineCache.put(path, lines); |
508 | 6 | return lines; |
509 | |
} |
510 | |
|
511 | |
|
512 | |
|
513 | |
|
514 | |
|
515 | |
|
516 | |
private String[] getTrimmed(String[] aLines) |
517 | |
{ |
518 | 6 | final String[] ret = new String[aLines.length]; |
519 | 149 | for (int i = 0; i < ret.length; i++) { |
520 | 143 | ret[i] = aLines[i].trim(); |
521 | |
} |
522 | 6 | return ret; |
523 | |
} |
524 | |
|
525 | |
|
526 | |
|
527 | |
|
528 | |
|
529 | |
|
530 | |
|
531 | |
|
532 | |
|
533 | |
private void reportDuplicate( |
534 | |
int aEquivalent, int aILine, File aJFile, int aJLine) |
535 | |
{ |
536 | 6 | final String fileName = |
537 | |
Utils.getStrippedFileName(mBasedir, aJFile.getPath()); |
538 | 6 | log(aILine + 1, "duplicates.lines", aEquivalent, fileName, aJLine + 1); |
539 | 6 | mDuplicates += 1; |
540 | 6 | } |
541 | |
|
542 | |
} |