001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.tez.stream.element;
022
023import java.util.Collections;
024import java.util.List;
025import java.util.Map;
026
027import cascading.CascadingException;
028import cascading.flow.FlowProcess;
029import cascading.flow.SliceCounters;
030import cascading.flow.hadoop.HadoopCoGroupClosure;
031import cascading.flow.hadoop.util.TimedIterator;
032import cascading.flow.stream.StopDataNotificationException;
033import cascading.flow.stream.duct.DuctException;
034import cascading.flow.stream.graph.IORole;
035import cascading.flow.tez.TezCoGroupClosure;
036import cascading.pipe.CoGroup;
037import cascading.tuple.Tuple;
038import cascading.tuple.io.TuplePair;
039import cascading.util.LogUtil;
040import cascading.util.SortedListMultiMap;
041import org.apache.tez.runtime.api.LogicalInput;
042import org.apache.tez.runtime.api.LogicalOutput;
043import org.apache.tez.runtime.library.api.KeyValuesReader;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 *
049 */
050public class TezCoGroupGate extends TezGroupGate
051  {
052  private static final Logger LOG = LoggerFactory.getLogger( TezCoGroupGate.class );
053
054  protected TimedIterator<Tuple>[] timedIterators;
055
056  public TezCoGroupGate( FlowProcess flowProcess, CoGroup coGroup, IORole role, LogicalOutput logicalOutput )
057    {
058    super( flowProcess, coGroup, role, logicalOutput );
059    }
060
061  public TezCoGroupGate( FlowProcess flowProcess, CoGroup coGroup, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs )
062    {
063    super( flowProcess, coGroup, role, logicalInputs );
064
065    this.timedIterators = new TimedIterator[ logicalInputs.getKeys().size() ];
066
067    for( int i = 0; i < this.timedIterators.length; i++ )
068      this.timedIterators[ i ] = new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read, i );
069    }
070
071  @Override
072  protected Throwable reduce() throws Exception
073    {
074    try
075      {
076      start( this );
077
078      SortedListMultiMap<Integer, KeyValuesReader> readers = getKeyValuesReaders();
079      SortedListMultiMap<Tuple, Iterable<Tuple>> iterables = getSortedMultiMap( readers.getKeys().size() );
080
081      Map.Entry<Tuple, List<Iterable<Tuple>>> current = forwardToNext( readers, iterables, null );
082      List<Iterable<Tuple>> currentValues;
083
084      while( current != null )
085        {
086        currentValues = current.getValue();
087
088        for( int i = 0; i < timedIterators.length; i++ )
089          timedIterators[ i ].reset( currentValues.get( i ) );
090
091        try
092          {
093          accept( current.getKey(), timedIterators );
094          }
095        catch( StopDataNotificationException exception )
096          {
097          LogUtil.logWarnOnce( LOG, "received unsupported stop data notification, ignoring: {}", exception.getMessage() );
098          }
099
100        current = forwardToNext( readers, iterables, currentValues );
101        }
102
103      complete( this );
104      }
105    catch( Throwable throwable )
106      {
107      if( !( throwable instanceof OutOfMemoryError ) )
108        LOG.error( "caught throwable", throwable );
109
110      return throwable;
111      }
112
113    return null;
114    }
115
116  private SortedListMultiMap<Integer, KeyValuesReader> getKeyValuesReaders() throws Exception
117    {
118    SortedListMultiMap<Integer, KeyValuesReader> readers = new SortedListMultiMap<>();
119
120    for( Map.Entry<Integer, List<LogicalInput>> entry : logicalInputs.getEntries() )
121      {
122      for( LogicalInput logicalInput : entry.getValue() )
123        readers.put( entry.getKey(), (KeyValuesReader) logicalInput.getReader() );
124      }
125
126    return readers;
127    }
128
129  private Map.Entry<Tuple, List<Iterable<Tuple>>> forwardToNext( SortedListMultiMap<Integer, KeyValuesReader> readers, SortedListMultiMap<Tuple, Iterable<Tuple>> iterables, List<Iterable<Tuple>> current )
130    {
131    try
132      {
133      int size = current == null ? readers.getKeys().size() : current.size();
134
135      for( int ordinal = 0; ordinal < size; ordinal++ )
136        {
137        if( current != null && current.get( ordinal ) == null )
138          continue;
139
140        for( KeyValuesReader reader : readers.getValues( ordinal ) )
141          {
142          if( !reader.next() )
143            continue;
144
145          Tuple currentKey = (Tuple) reader.getCurrentKey();
146
147          if( splice.isSorted() )
148            currentKey = ( (TuplePair) currentKey ).getLhs();
149
150          currentKey = getDelegatedTuple( currentKey ); // applies hasher
151
152          Iterable<Tuple> currentValues = (Iterable) reader.getCurrentValues();
153
154          iterables.set( currentKey, ordinal, currentValues );
155          }
156        }
157      }
158    catch( OutOfMemoryError error )
159      {
160      handleReThrowableException( "out of memory, try increasing task memory allocation", error );
161      }
162    catch( CascadingException exception )
163      {
164      handleException( exception, null );
165      }
166    catch( Throwable throwable )
167      {
168      handleException( new DuctException( "internal error", throwable ), null );
169      }
170
171    return iterables.pollFirstEntry();
172    }
173
174  private SortedListMultiMap<Tuple, Iterable<Tuple>> getSortedMultiMap( final int length )
175    {
176    return new SortedListMultiMap<Tuple, Iterable<Tuple>>( getKeyComparator(), length )
177      {
178      Iterable<Tuple>[] array = new Iterable[ length ];
179
180      @Override
181      protected List createCollection()
182        {
183        List<Iterable<Tuple>> collection = super.createCollection();
184
185        Collections.addAll( collection, array ); // init with nulls
186
187        return collection;
188        }
189      };
190    }
191
192  @Override
193  protected HadoopCoGroupClosure createClosure()
194    {
195    return new TezCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields );
196    }
197
198  @Override
199  protected Tuple unwrapGrouping( Tuple key )
200    {
201    return key;
202    }
203
204  }