001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.tez.stream.element;
022
023import java.io.IOException;
024import java.util.Collection;
025import java.util.HashSet;
026import java.util.Set;
027
028import cascading.CascadingException;
029import cascading.flow.FlowProcess;
030import cascading.flow.SliceCounters;
031import cascading.flow.planner.Scope;
032import cascading.flow.stream.duct.Duct;
033import cascading.flow.stream.duct.DuctException;
034import cascading.flow.stream.element.InputSource;
035import cascading.flow.stream.element.SpliceGate;
036import cascading.flow.stream.graph.IORole;
037import cascading.flow.stream.graph.StreamGraph;
038import cascading.pipe.Pipe;
039import cascading.pipe.Splice;
040import cascading.tap.hadoop.util.MeasuredOutputCollector;
041import cascading.tuple.Tuple;
042import cascading.tuple.TupleEntry;
043import cascading.tuple.io.KeyTuple;
044import cascading.tuple.io.ValueTuple;
045import cascading.tuple.util.Resettable1;
046import cascading.util.SortedListMultiMap;
047import cascading.util.Util;
048import org.apache.hadoop.mapred.OutputCollector;
049import org.apache.tez.runtime.api.LogicalInput;
050import org.apache.tez.runtime.api.LogicalOutput;
051import org.apache.tez.runtime.library.api.KeyValueReader;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055/**
056 *
057 */
058public class TezMergeGate extends SpliceGate<TupleEntry, TupleEntry> implements InputSource
059  {
060  private static final Logger LOG = LoggerFactory.getLogger( TezMergeGate.class );
061
062  protected Collection<LogicalOutput> logicalOutputs;
063  protected SortedListMultiMap<Integer, LogicalInput> logicalInputs;
064
065  private MeasuredOutputCollector collector;
066  private TupleEntry valueEntry;
067
068  private final Resettable1<Tuple> keyTuple = new KeyTuple();
069
070  public TezMergeGate( FlowProcess flowProcess, Splice splice, IORole role, Collection<LogicalOutput> logicalOutputs )
071    {
072    super( flowProcess, splice, role );
073
074    if( logicalOutputs == null || logicalOutputs.isEmpty() )
075      throw new IllegalArgumentException( "output must not be null or empty" );
076
077    this.logicalOutputs = logicalOutputs;
078    }
079
080  public TezMergeGate( FlowProcess flowProcess, Splice splice, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs )
081    {
082    super( flowProcess, splice, role );
083
084    if( logicalInputs == null || logicalInputs.getKeys().size() == 0 )
085      throw new IllegalArgumentException( "inputs must not be null or empty" );
086
087    Set<LogicalInput> inputs = new HashSet<>( logicalInputs.getValues() );
088
089    if( inputs.size() != 1 )
090      throw new IllegalArgumentException( "only supports a single input" );
091
092    this.logicalInputs = logicalInputs;
093    }
094
095  @Override
096  public void initialize()
097    {
098    super.initialize();
099
100    Scope outgoingScope = Util.getFirst( outgoingScopes );
101    valueEntry = new TupleEntry( outgoingScope.getOutValuesFields(), true );
102    }
103
104  @Override
105  public void bind( StreamGraph streamGraph )
106    {
107    if( role != IORole.sink )
108      next = getNextFor( streamGraph );
109    }
110
111  @Override
112  public void prepare()
113    {
114    try
115      {
116      if( logicalInputs != null )
117        {
118        for( LogicalInput logicalInput : logicalInputs.getValues() )
119          {
120          LOG.info( "calling {}#start() on: {} {}, for {} inputs", logicalInput.getClass().getSimpleName(), getSplice(), Pipe.id( getSplice() ), logicalInputs.getValues().size() );
121
122          logicalInput.start();
123          }
124        }
125
126      if( logicalOutputs != null )
127        {
128        for( LogicalOutput logicalOutput : logicalOutputs )
129          {
130          LOG.info( "calling {}#start() on: {} {}", logicalOutput.getClass().getSimpleName(), getSplice(), Pipe.id( getSplice() ) );
131
132          logicalOutput.start();
133          }
134        }
135      }
136    catch( Exception exception )
137      {
138      throw new CascadingException( "unable to start input/output", exception );
139      }
140
141    if( role != IORole.source )
142      collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() );
143
144    super.prepare();
145    }
146
147  @Override
148  public void start( Duct previous )
149    {
150    if( next != null )
151      super.start( previous );
152    }
153
154  @Override
155  public void receive( Duct previous, int ordinal, TupleEntry incomingEntry )
156    {
157    try
158      {
159      keyTuple.reset( incomingEntry.getTuple() );
160
161      collector.collect( keyTuple, ValueTuple.NULL );
162      flowProcess.increment( SliceCounters.Tuples_Written, 1 );
163      }
164    catch( OutOfMemoryError error )
165      {
166      handleReThrowableException( "out of memory, try increasing task memory allocation", error );
167      }
168    catch( CascadingException exception )
169      {
170      handleException( exception, incomingEntry );
171      }
172    catch( Throwable throwable )
173      {
174      handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry );
175      }
176    }
177
178  @Override
179  public void complete( Duct previous )
180    {
181    if( next != null )
182      super.complete( previous );
183    }
184
185  @Override
186  public void run( Object input ) throws Throwable
187    {
188    Throwable throwable = map();
189
190    if( throwable != null )
191      throw throwable;
192    }
193
194  protected Throwable map() throws Exception
195    {
196    Throwable localThrowable = null;
197
198    try
199      {
200      start( this );
201
202      // if multiple ordinals, an input could be duplicated if sourcing multiple paths
203      LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() );
204
205      KeyValueReader reader = (KeyValueReader) logicalInput.getReader();
206
207      while( reader.next() )
208        {
209        Tuple currentKey = (Tuple) reader.getCurrentKey();
210
211        valueEntry.setTuple( currentKey );
212        next.receive( this, 0, valueEntry );
213        }
214
215      complete( this );
216      }
217    catch( Throwable throwable )
218      {
219      if( !( throwable instanceof OutOfMemoryError ) )
220        LOG.error( "caught throwable", throwable );
221
222      return throwable;
223      }
224
225    return localThrowable;
226    }
227
228  protected OutputCollector createOutputCollector()
229    {
230    if( logicalOutputs.size() == 1 )
231      return new OldOutputCollector( Util.getFirst( logicalOutputs ) );
232
233    final OutputCollector[] collectors = new OutputCollector[ logicalOutputs.size() ];
234
235    int count = 0;
236    for( LogicalOutput logicalOutput : logicalOutputs )
237      collectors[ count++ ] = new OldOutputCollector( logicalOutput );
238
239    return new OutputCollector()
240      {
241      @Override
242      public void collect( Object key, Object value ) throws IOException
243        {
244        for( OutputCollector outputCollector : collectors )
245          outputCollector.collect( key, value );
246        }
247      };
248    }
249  }