001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.tez.stream.element; 022 023import cascading.flow.FlowProcess; 024import cascading.flow.SliceCounters; 025import cascading.flow.hadoop.HadoopGroupByClosure; 026import cascading.flow.hadoop.util.TimedIterator; 027import cascading.flow.stream.StopDataNotificationException; 028import cascading.flow.stream.graph.IORole; 029import cascading.flow.tez.TezGroupByClosure; 030import cascading.flow.tez.util.SecondarySortKeyValuesReader; 031import cascading.pipe.GroupBy; 032import cascading.tuple.Tuple; 033import cascading.tuple.io.TuplePair; 034import cascading.util.LogUtil; 035import cascading.util.SortedListMultiMap; 036import cascading.util.Util; 037import org.apache.tez.runtime.api.LogicalInput; 038import org.apache.tez.runtime.api.LogicalOutput; 039import org.apache.tez.runtime.library.api.KeyValuesReader; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * 045 */ 046public class TezGroupByGate extends TezGroupGate 047 { 048 private static final Logger LOG = LoggerFactory.getLogger( TezGroupByGate.class ); 049 050 protected TimedIterator[] timedIterators; 051 052 public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, LogicalOutput logicalOutput ) 053 { 054 super( flowProcess, groupBy, role, logicalOutput ); 055 } 056 057 public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs ) 058 { 059 super( flowProcess, groupBy, role, logicalInputs ); 060 061 this.timedIterators = TimedIterator.iterators( new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read ) ); 062 } 063 064 protected Throwable reduce() throws Exception 065 { 066 try 067 { 068 start( this ); 069 070 // if multiple ordinals, an input could be duplicated if sourcing multiple paths 071 LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() ); 072 073 KeyValuesReader reader = (KeyValuesReader) logicalInput.getReader(); 074 075 if( sortFields != null ) 076 reader = new SecondarySortKeyValuesReader( reader, groupComparators[ 0 ] ); 077 078 while( reader.next() ) 079 { 080 Tuple currentKey = (Tuple) reader.getCurrentKey(); // if secondary sorting, is a TuplePair 081 Iterable currentValues = reader.getCurrentValues(); 082 083 timedIterators[ 0 ].reset( currentValues ); 084 085 try 086 { 087 accept( currentKey, timedIterators ); // will unwrap the TuplePair 088 } 089 catch( StopDataNotificationException exception ) 090 { 091 LogUtil.logWarnOnce( LOG, "received unsupported stop data notification, ignoring: {}", exception.getMessage() ); 092 } 093 } 094 095 complete( this ); 096 } 097 catch( Throwable throwable ) 098 { 099 if( !( throwable instanceof OutOfMemoryError ) ) 100 LOG.error( "caught throwable", throwable ); 101 102 return throwable; 103 } 104 105 return null; 106 } 107 108 @Override 109 protected HadoopGroupByClosure createClosure() 110 { 111 return new TezGroupByClosure( flowProcess, keyFields, valuesFields ); 112 } 113 114 @Override 115 protected Tuple unwrapGrouping( Tuple key ) 116 { 117 // copying the lhs key during secondary sorting prevents the key from advancing at the end of the 118 // aggregation iterator 119 return sortFields == null ? key : new Tuple( ( (TuplePair) key ).getLhs() ); 120 } 121 }