21
21
import java .io .FileInputStream ;
22
22
import java .io .FileOutputStream ;
23
23
import java .io .IOException ;
24
+ import javax .annotation .Nullable ;
24
25
26
+ import scala .None$ ;
27
+ import scala .Option ;
25
28
import scala .Product2 ;
26
29
import scala .Tuple2 ;
27
30
import scala .collection .Iterator ;
28
31
32
+ import com .google .common .annotations .VisibleForTesting ;
29
33
import com .google .common .io .Closeables ;
30
34
import org .slf4j .Logger ;
31
35
import org .slf4j .LoggerFactory ;
32
36
33
37
import org .apache .spark .Partitioner ;
38
+ import org .apache .spark .ShuffleDependency ;
34
39
import org .apache .spark .SparkConf ;
35
40
import org .apache .spark .TaskContext ;
36
41
import org .apache .spark .executor .ShuffleWriteMetrics ;
42
+ import org .apache .spark .scheduler .MapStatus ;
43
+ import org .apache .spark .scheduler .MapStatus$ ;
37
44
import org .apache .spark .serializer .Serializer ;
38
45
import org .apache .spark .serializer .SerializerInstance ;
46
+ import org .apache .spark .shuffle .IndexShuffleBlockResolver ;
47
+ import org .apache .spark .shuffle .ShuffleWriter ;
39
48
import org .apache .spark .storage .*;
40
49
import org .apache .spark .util .Utils ;
41
50
62
71
* <p>
63
72
* There have been proposals to completely remove this code path; see SPARK-6026 for details.
64
73
*/
65
- final class BypassMergeSortShuffleWriter <K , V > implements SortShuffleFileWriter <K , V > {
74
+ final class BypassMergeSortShuffleWriter <K , V > extends ShuffleWriter <K , V > {
66
75
67
76
private final Logger logger = LoggerFactory .getLogger (BypassMergeSortShuffleWriter .class );
68
77
@@ -72,31 +81,52 @@ final class BypassMergeSortShuffleWriter<K, V> implements SortShuffleFileWriter<
72
81
private final BlockManager blockManager ;
73
82
private final Partitioner partitioner ;
74
83
private final ShuffleWriteMetrics writeMetrics ;
84
+ private final int shuffleId ;
85
+ private final int mapId ;
75
86
private final Serializer serializer ;
87
+ private final IndexShuffleBlockResolver shuffleBlockResolver ;
76
88
77
89
/** Array of file writers, one for each partition */
78
90
private DiskBlockObjectWriter [] partitionWriters ;
91
+ @ Nullable private MapStatus mapStatus ;
92
+ private long [] partitionLengths ;
93
+
94
+ /**
95
+ * Are we in the process of stopping? Because map tasks can call stop() with success = true
96
+ * and then call stop() with success = false if they get an exception, we want to make sure
97
+ * we don't try deleting files, etc twice.
98
+ */
99
+ private boolean stopping = false ;
79
100
80
101
public BypassMergeSortShuffleWriter (
81
- SparkConf conf ,
82
102
BlockManager blockManager ,
83
- Partitioner partitioner ,
84
- ShuffleWriteMetrics writeMetrics ,
85
- Serializer serializer ) {
103
+ IndexShuffleBlockResolver shuffleBlockResolver ,
104
+ BypassMergeSortShuffleHandle <K , V > handle ,
105
+ int mapId ,
106
+ TaskContext taskContext ,
107
+ SparkConf conf ) {
86
108
// Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
87
109
this .fileBufferSize = (int ) conf .getSizeAsKb ("spark.shuffle.file.buffer" , "32k" ) * 1024 ;
88
110
this .transferToEnabled = conf .getBoolean ("spark.file.transferTo" , true );
89
- this .numPartitions = partitioner .numPartitions ();
90
111
this .blockManager = blockManager ;
91
- this .partitioner = partitioner ;
92
- this .writeMetrics = writeMetrics ;
93
- this .serializer = serializer ;
112
+ final ShuffleDependency <K , V , V > dep = handle .dependency ();
113
+ this .mapId = mapId ;
114
+ this .shuffleId = dep .shuffleId ();
115
+ this .partitioner = dep .partitioner ();
116
+ this .numPartitions = partitioner .numPartitions ();
117
+ this .writeMetrics = new ShuffleWriteMetrics ();
118
+ taskContext .taskMetrics ().shuffleWriteMetrics_$eq (Option .apply (writeMetrics ));
119
+ this .serializer = Serializer .getSerializer (dep .serializer ());
120
+ this .shuffleBlockResolver = shuffleBlockResolver ;
94
121
}
95
122
96
123
@ Override
97
- public void insertAll (Iterator <Product2 <K , V >> records ) throws IOException {
124
+ public void write (Iterator <Product2 <K , V >> records ) throws IOException {
98
125
assert (partitionWriters == null );
99
126
if (!records .hasNext ()) {
127
+ partitionLengths = new long [numPartitions ];
128
+ shuffleBlockResolver .writeIndexFile (shuffleId , mapId , partitionLengths );
129
+ mapStatus = MapStatus$ .MODULE$ .apply (blockManager .shuffleServerId (), partitionLengths );
100
130
return ;
101
131
}
102
132
final SerializerInstance serInstance = serializer .newInstance ();
@@ -124,13 +154,24 @@ public void insertAll(Iterator<Product2<K, V>> records) throws IOException {
124
154
for (DiskBlockObjectWriter writer : partitionWriters ) {
125
155
writer .commitAndClose ();
126
156
}
157
+
158
+ partitionLengths =
159
+ writePartitionedFile (shuffleBlockResolver .getDataFile (shuffleId , mapId ));
160
+ shuffleBlockResolver .writeIndexFile (shuffleId , mapId , partitionLengths );
161
+ mapStatus = MapStatus$ .MODULE$ .apply (blockManager .shuffleServerId (), partitionLengths );
127
162
}
128
163
129
- @ Override
130
- public long [] writePartitionedFile (
131
- BlockId blockId ,
132
- TaskContext context ,
133
- File outputFile ) throws IOException {
164
+ @ VisibleForTesting
165
+ long [] getPartitionLengths () {
166
+ return partitionLengths ;
167
+ }
168
+
169
+ /**
170
+ * Concatenate all of the per-partition files into a single combined file.
171
+ *
172
+ * @return array of lengths, in bytes, of each partition of the file (used by map output tracker).
173
+ */
174
+ private long [] writePartitionedFile (File outputFile ) throws IOException {
134
175
// Track location of the partition starts in the output file
135
176
final long [] lengths = new long [numPartitions ];
136
177
if (partitionWriters == null ) {
@@ -165,18 +206,33 @@ public long[] writePartitionedFile(
165
206
}
166
207
167
208
@ Override
168
- public void stop () throws IOException {
169
- if (partitionWriters != null ) {
170
- try {
171
- for (DiskBlockObjectWriter writer : partitionWriters ) {
172
- // This method explicitly does _not_ throw exceptions:
173
- File file = writer .revertPartialWritesAndClose ();
174
- if (!file .delete ()) {
175
- logger .error ("Error while deleting file {}" , file .getAbsolutePath ());
209
+ public Option <MapStatus > stop (boolean success ) {
210
+ if (stopping ) {
211
+ return None$ .empty ();
212
+ } else {
213
+ stopping = true ;
214
+ if (success ) {
215
+ if (mapStatus == null ) {
216
+ throw new IllegalStateException ("Cannot call stop(true) without having called write()" );
217
+ }
218
+ return Option .apply (mapStatus );
219
+ } else {
220
+ // The map task failed, so delete our output data.
221
+ if (partitionWriters != null ) {
222
+ try {
223
+ for (DiskBlockObjectWriter writer : partitionWriters ) {
224
+ // This method explicitly does _not_ throw exceptions:
225
+ File file = writer .revertPartialWritesAndClose ();
226
+ if (!file .delete ()) {
227
+ logger .error ("Error while deleting file {}" , file .getAbsolutePath ());
228
+ }
229
+ }
230
+ } finally {
231
+ partitionWriters = null ;
176
232
}
177
233
}
178
- } finally {
179
- partitionWriters = null ;
234
+ shuffleBlockResolver . removeDataByMap ( shuffleId , mapId );
235
+ return None$ . empty () ;
180
236
}
181
237
}
182
238
}
0 commit comments