|
| 1 | +/** |
| 2 | + * Copyright 2015 Stijn de Gouw |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + * |
| 16 | + */ |
| 17 | +package org.apache.spark.util.collection; |
| 18 | + |
| 19 | +import java.util.*; |
| 20 | + |
| 21 | +/** |
| 22 | + * This codes generates a int array which fails the standard TimSort. |
| 23 | + * |
| 24 | + * The blog that reported the bug |
| 25 | + * http://www.envisage-project.eu/timsort-specification-and-verification/ |
| 26 | + * |
| 27 | + * This codes was originally wrote by Stijn de Gouw, modified by Evan Yu to adapt to |
| 28 | + * our test suite. |
| 29 | + * |
| 30 | + * https://github.com/abstools/java-timsort-bug |
| 31 | + * https://github.com/abstools/java-timsort-bug/blob/master/LICENSE |
| 32 | + */ |
| 33 | +public class TestTimSort { |
| 34 | + |
| 35 | + private static final int MIN_MERGE = 32; |
| 36 | + |
| 37 | + /** |
| 38 | + * Returns an array of integers that demonstrate the bug in TimSort |
| 39 | + */ |
| 40 | + public static int[] getTimSortBugTestSet(int length) { |
| 41 | + int minRun = minRunLength(length); |
| 42 | + List<Long> runs = runsJDKWorstCase(minRun, length); |
| 43 | + return createArray(runs, length); |
| 44 | + } |
| 45 | + |
| 46 | + private static int minRunLength(int n) { |
| 47 | + int r = 0; // Becomes 1 if any 1 bits are shifted off |
| 48 | + while (n >= MIN_MERGE) { |
| 49 | + r |= (n & 1); |
| 50 | + n >>= 1; |
| 51 | + } |
| 52 | + return n + r; |
| 53 | + } |
| 54 | + |
| 55 | + private static int[] createArray(List<Long> runs, int length) { |
| 56 | + int[] a = new int[length]; |
| 57 | + Arrays.fill(a, 0); |
| 58 | + int endRun = -1; |
| 59 | + for (long len : runs) { |
| 60 | + a[endRun += len] = 1; |
| 61 | + } |
| 62 | + a[length - 1] = 0; |
| 63 | + return a; |
| 64 | + } |
| 65 | + |
| 66 | + /** |
| 67 | + * Fills <code>runs</code> with a sequence of run lengths of the form<br> |
| 68 | + * Y_n x_{n,1} x_{n,2} ... x_{n,l_n} <br> |
| 69 | + * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br> |
| 70 | + * ... <br> |
| 71 | + * Y_1 x_{1,1} x_{1,2} ... x_{1,l_1}<br> |
| 72 | + * The Y_i's are chosen to satisfy the invariant throughout execution, |
| 73 | + * but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>) |
| 74 | + * into an X_i that violates the invariant. |
| 75 | + * |
| 76 | + * @param length The sum of all run lengths that will be added to <code>runs</code>. |
| 77 | + */ |
| 78 | + private static List<Long> runsJDKWorstCase(int minRun, int length) { |
| 79 | + List<Long> runs = new ArrayList<Long>(); |
| 80 | + |
| 81 | + long runningTotal = 0, Y = minRun + 4, X = minRun; |
| 82 | + |
| 83 | + while (runningTotal + Y + X <= length) { |
| 84 | + runningTotal += X + Y; |
| 85 | + generateJDKWrongElem(runs, minRun, X); |
| 86 | + runs.add(0, Y); |
| 87 | + // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1} |
| 88 | + X = Y + runs.get(1) + 1; |
| 89 | + // Y_{i+1} = X_{i+1} + Y_i + 1 |
| 90 | + Y += X + 1; |
| 91 | + } |
| 92 | + |
| 93 | + if (runningTotal + X <= length) { |
| 94 | + runningTotal += X; |
| 95 | + generateJDKWrongElem(runs, minRun, X); |
| 96 | + } |
| 97 | + |
| 98 | + runs.add(length - runningTotal); |
| 99 | + return runs; |
| 100 | + } |
| 101 | + |
| 102 | + /** |
| 103 | + * Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br> |
| 104 | + * 1. X = x_1 + ... + x_n <br> |
| 105 | + * 2. x_j >= minRun for all j <br> |
| 106 | + * 3. x_1 + ... + x_{j-2} < x_j < x_1 + ... + x_{j-1} for all j <br> |
| 107 | + * These conditions guarantee that TimSort merges all x_j's one by one |
| 108 | + * (resulting in X) using only merges on the second-to-last element. |
| 109 | + * |
| 110 | + * @param X The sum of the sequence that should be added to runs. |
| 111 | + */ |
| 112 | + private static void generateJDKWrongElem(List<Long> runs, int minRun, long X) { |
| 113 | + for (long newTotal; X >= 2 * minRun + 1; X = newTotal) { |
| 114 | + //Default strategy |
| 115 | + newTotal = X / 2 + 1; |
| 116 | + //Specialized strategies |
| 117 | + if (3 * minRun + 3 <= X && X <= 4 * minRun + 1) { |
| 118 | + // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal to runs |
| 119 | + newTotal = 2 * minRun + 1; |
| 120 | + } else if (5 * minRun + 5 <= X && X <= 6 * minRun + 5) { |
| 121 | + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal to runs |
| 122 | + newTotal = 3 * minRun + 3; |
| 123 | + } else if (8 * minRun + 9 <= X && X <= 10 * minRun + 9) { |
| 124 | + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal to runs |
| 125 | + newTotal = 5 * minRun + 5; |
| 126 | + } else if (13 * minRun + 15 <= X && X <= 16 * minRun + 17) { |
| 127 | + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs |
| 128 | + newTotal = 8 * minRun + 9; |
| 129 | + } |
| 130 | + runs.add(0, X - newTotal); |
| 131 | + } |
| 132 | + runs.add(0, X); |
| 133 | + } |
| 134 | +} |
0 commit comments