001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.tez.stream.element;
022
023import cascading.flow.FlowProcess;
024import cascading.flow.SliceCounters;
025import cascading.flow.hadoop.HadoopGroupByClosure;
026import cascading.flow.hadoop.util.TimedIterator;
027import cascading.flow.stream.StopDataNotificationException;
028import cascading.flow.stream.graph.IORole;
029import cascading.flow.tez.TezGroupByClosure;
030import cascading.flow.tez.util.SecondarySortKeyValuesReader;
031import cascading.pipe.GroupBy;
032import cascading.tuple.Tuple;
033import cascading.tuple.io.TuplePair;
034import cascading.util.LogUtil;
035import cascading.util.SortedListMultiMap;
036import cascading.util.Util;
037import org.apache.tez.runtime.api.LogicalInput;
038import org.apache.tez.runtime.api.LogicalOutput;
039import org.apache.tez.runtime.library.api.KeyValuesReader;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 *
045 */
046public class TezGroupByGate extends TezGroupGate
047  {
048  private static final Logger LOG = LoggerFactory.getLogger( TezGroupByGate.class );
049
050  protected TimedIterator[] timedIterators;
051
052  public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, LogicalOutput logicalOutput )
053    {
054    super( flowProcess, groupBy, role, logicalOutput );
055    }
056
057  public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs )
058    {
059    super( flowProcess, groupBy, role, logicalInputs );
060
061    this.timedIterators = TimedIterator.iterators( new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read ) );
062    }
063
064  protected Throwable reduce() throws Exception
065    {
066    try
067      {
068      start( this );
069
070      // if multiple ordinals, an input could be duplicated if sourcing multiple paths
071      LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() );
072
073      KeyValuesReader reader = (KeyValuesReader) logicalInput.getReader();
074
075      if( sortFields != null )
076        reader = new SecondarySortKeyValuesReader( reader, groupComparators[ 0 ] );
077
078      while( reader.next() )
079        {
080        Tuple currentKey = (Tuple) reader.getCurrentKey(); // if secondary sorting, is a TuplePair
081        Iterable currentValues = reader.getCurrentValues();
082
083        timedIterators[ 0 ].reset( currentValues );
084
085        try
086          {
087          accept( currentKey, timedIterators ); // will unwrap the TuplePair
088          }
089        catch( StopDataNotificationException exception )
090          {
091          LogUtil.logWarnOnce( LOG, "received unsupported stop data notification, ignoring: {}", exception.getMessage() );
092          }
093        }
094
095      complete( this );
096      }
097    catch( Throwable throwable )
098      {
099      if( !( throwable instanceof OutOfMemoryError ) )
100        LOG.error( "caught throwable", throwable );
101
102      return throwable;
103      }
104
105    return null;
106    }
107
108  @Override
109  protected HadoopGroupByClosure createClosure()
110    {
111    return new TezGroupByClosure( flowProcess, keyFields, valuesFields );
112    }
113
114  @Override
115  protected Tuple unwrapGrouping( Tuple key )
116    {
117    // copying the lhs key during secondary sorting prevents the key from advancing at the end of the
118    // aggregation iterator
119    return sortFields == null ? key : new Tuple( ( (TuplePair) key ).getLhs() );
120    }
121  }