001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.tez.stream.element; 022 023import java.io.IOException; 024import java.util.Collection; 025import java.util.HashSet; 026import java.util.Set; 027 028import cascading.CascadingException; 029import cascading.flow.FlowProcess; 030import cascading.flow.SliceCounters; 031import cascading.flow.planner.Scope; 032import cascading.flow.stream.duct.Duct; 033import cascading.flow.stream.duct.DuctException; 034import cascading.flow.stream.element.InputSource; 035import cascading.flow.stream.element.SpliceGate; 036import cascading.flow.stream.graph.IORole; 037import cascading.flow.stream.graph.StreamGraph; 038import cascading.pipe.Pipe; 039import cascading.pipe.Splice; 040import cascading.tap.hadoop.util.MeasuredOutputCollector; 041import cascading.tuple.Tuple; 042import cascading.tuple.TupleEntry; 043import cascading.tuple.io.KeyTuple; 044import cascading.tuple.io.ValueTuple; 045import cascading.tuple.util.Resettable1; 046import cascading.util.SortedListMultiMap; 047import cascading.util.Util; 048import org.apache.hadoop.mapred.OutputCollector; 049import org.apache.tez.runtime.api.LogicalInput; 050import org.apache.tez.runtime.api.LogicalOutput; 051import org.apache.tez.runtime.library.api.KeyValueReader; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * 057 */ 058public class TezMergeGate extends SpliceGate<TupleEntry, TupleEntry> implements InputSource 059 { 060 private static final Logger LOG = LoggerFactory.getLogger( TezMergeGate.class ); 061 062 protected Collection<LogicalOutput> logicalOutputs; 063 protected SortedListMultiMap<Integer, LogicalInput> logicalInputs; 064 065 private MeasuredOutputCollector collector; 066 private TupleEntry valueEntry; 067 068 private final Resettable1<Tuple> keyTuple = new KeyTuple(); 069 070 public TezMergeGate( FlowProcess flowProcess, Splice splice, IORole role, Collection<LogicalOutput> logicalOutputs ) 071 { 072 super( flowProcess, splice, role ); 073 074 if( logicalOutputs == null || logicalOutputs.isEmpty() ) 075 throw new IllegalArgumentException( "output must not be null or empty" ); 076 077 this.logicalOutputs = logicalOutputs; 078 } 079 080 public TezMergeGate( FlowProcess flowProcess, Splice splice, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs ) 081 { 082 super( flowProcess, splice, role ); 083 084 if( logicalInputs == null || logicalInputs.getKeys().size() == 0 ) 085 throw new IllegalArgumentException( "inputs must not be null or empty" ); 086 087 Set<LogicalInput> inputs = new HashSet<>( logicalInputs.getValues() ); 088 089 if( inputs.size() != 1 ) 090 throw new IllegalArgumentException( "only supports a single input" ); 091 092 this.logicalInputs = logicalInputs; 093 } 094 095 @Override 096 public void initialize() 097 { 098 super.initialize(); 099 100 Scope outgoingScope = Util.getFirst( outgoingScopes ); 101 valueEntry = new TupleEntry( outgoingScope.getOutValuesFields(), true ); 102 } 103 104 @Override 105 public void bind( StreamGraph streamGraph ) 106 { 107 if( role != IORole.sink ) 108 next = getNextFor( streamGraph ); 109 } 110 111 @Override 112 public void prepare() 113 { 114 try 115 { 116 if( logicalInputs != null ) 117 { 118 for( LogicalInput logicalInput : logicalInputs.getValues() ) 119 { 120 LOG.info( "calling {}#start() on: {} {}, for {} inputs", logicalInput.getClass().getSimpleName(), getSplice(), Pipe.id( getSplice() ), logicalInputs.getValues().size() ); 121 122 logicalInput.start(); 123 } 124 } 125 126 if( logicalOutputs != null ) 127 { 128 for( LogicalOutput logicalOutput : logicalOutputs ) 129 { 130 LOG.info( "calling {}#start() on: {} {}", logicalOutput.getClass().getSimpleName(), getSplice(), Pipe.id( getSplice() ) ); 131 132 logicalOutput.start(); 133 } 134 } 135 } 136 catch( Exception exception ) 137 { 138 throw new CascadingException( "unable to start input/output", exception ); 139 } 140 141 if( role != IORole.source ) 142 collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() ); 143 144 super.prepare(); 145 } 146 147 @Override 148 public void start( Duct previous ) 149 { 150 if( next != null ) 151 super.start( previous ); 152 } 153 154 @Override 155 public void receive( Duct previous, int ordinal, TupleEntry incomingEntry ) 156 { 157 try 158 { 159 keyTuple.reset( incomingEntry.getTuple() ); 160 161 collector.collect( keyTuple, ValueTuple.NULL ); 162 flowProcess.increment( SliceCounters.Tuples_Written, 1 ); 163 } 164 catch( OutOfMemoryError error ) 165 { 166 handleReThrowableException( "out of memory, try increasing task memory allocation", error ); 167 } 168 catch( CascadingException exception ) 169 { 170 handleException( exception, incomingEntry ); 171 } 172 catch( Throwable throwable ) 173 { 174 handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry ); 175 } 176 } 177 178 @Override 179 public void complete( Duct previous ) 180 { 181 if( next != null ) 182 super.complete( previous ); 183 } 184 185 @Override 186 public void run( Object input ) throws Throwable 187 { 188 Throwable throwable = map(); 189 190 if( throwable != null ) 191 throw throwable; 192 } 193 194 protected Throwable map() throws Exception 195 { 196 Throwable localThrowable = null; 197 198 try 199 { 200 start( this ); 201 202 // if multiple ordinals, an input could be duplicated if sourcing multiple paths 203 LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() ); 204 205 KeyValueReader reader = (KeyValueReader) logicalInput.getReader(); 206 207 while( reader.next() ) 208 { 209 Tuple currentKey = (Tuple) reader.getCurrentKey(); 210 211 valueEntry.setTuple( currentKey ); 212 next.receive( this, 0, valueEntry ); 213 } 214 215 complete( this ); 216 } 217 catch( Throwable throwable ) 218 { 219 if( !( throwable instanceof OutOfMemoryError ) ) 220 LOG.error( "caught throwable", throwable ); 221 222 return throwable; 223 } 224 225 return localThrowable; 226 } 227 228 protected OutputCollector createOutputCollector() 229 { 230 if( logicalOutputs.size() == 1 ) 231 return new OldOutputCollector( Util.getFirst( logicalOutputs ) ); 232 233 final OutputCollector[] collectors = new OutputCollector[ logicalOutputs.size() ]; 234 235 int count = 0; 236 for( LogicalOutput logicalOutput : logicalOutputs ) 237 collectors[ count++ ] = new OldOutputCollector( logicalOutput ); 238 239 return new OutputCollector() 240 { 241 @Override 242 public void collect( Object key, Object value ) throws IOException 243 { 244 for( OutputCollector outputCollector : collectors ) 245 outputCollector.collect( key, value ); 246 } 247 }; 248 } 249 }