001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.tez.stream.element; 022 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026 027import cascading.CascadingException; 028import cascading.flow.FlowProcess; 029import cascading.flow.SliceCounters; 030import cascading.flow.hadoop.HadoopCoGroupClosure; 031import cascading.flow.hadoop.util.TimedIterator; 032import cascading.flow.stream.StopDataNotificationException; 033import cascading.flow.stream.duct.DuctException; 034import cascading.flow.stream.graph.IORole; 035import cascading.flow.tez.TezCoGroupClosure; 036import cascading.pipe.CoGroup; 037import cascading.tuple.Tuple; 038import cascading.tuple.io.TuplePair; 039import cascading.util.LogUtil; 040import cascading.util.SortedListMultiMap; 041import org.apache.tez.runtime.api.LogicalInput; 042import org.apache.tez.runtime.api.LogicalOutput; 043import org.apache.tez.runtime.library.api.KeyValuesReader; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * 049 */ 050public class TezCoGroupGate extends TezGroupGate 051 { 052 private static final Logger LOG = LoggerFactory.getLogger( TezCoGroupGate.class ); 053 054 protected TimedIterator<Tuple>[] timedIterators; 055 056 public TezCoGroupGate( FlowProcess flowProcess, CoGroup coGroup, IORole role, LogicalOutput logicalOutput ) 057 { 058 super( flowProcess, coGroup, role, logicalOutput ); 059 } 060 061 public TezCoGroupGate( FlowProcess flowProcess, CoGroup coGroup, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs ) 062 { 063 super( flowProcess, coGroup, role, logicalInputs ); 064 065 this.timedIterators = new TimedIterator[ logicalInputs.getKeys().size() ]; 066 067 for( int i = 0; i < this.timedIterators.length; i++ ) 068 this.timedIterators[ i ] = new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read, i ); 069 } 070 071 @Override 072 protected Throwable reduce() throws Exception 073 { 074 try 075 { 076 start( this ); 077 078 SortedListMultiMap<Integer, KeyValuesReader> readers = getKeyValuesReaders(); 079 SortedListMultiMap<Tuple, Iterable<Tuple>> iterables = getSortedMultiMap( readers.getKeys().size() ); 080 081 Map.Entry<Tuple, List<Iterable<Tuple>>> current = forwardToNext( readers, iterables, null ); 082 List<Iterable<Tuple>> currentValues; 083 084 while( current != null ) 085 { 086 currentValues = current.getValue(); 087 088 for( int i = 0; i < timedIterators.length; i++ ) 089 timedIterators[ i ].reset( currentValues.get( i ) ); 090 091 try 092 { 093 accept( current.getKey(), timedIterators ); 094 } 095 catch( StopDataNotificationException exception ) 096 { 097 LogUtil.logWarnOnce( LOG, "received unsupported stop data notification, ignoring: {}", exception.getMessage() ); 098 } 099 100 current = forwardToNext( readers, iterables, currentValues ); 101 } 102 103 complete( this ); 104 } 105 catch( Throwable throwable ) 106 { 107 if( !( throwable instanceof OutOfMemoryError ) ) 108 LOG.error( "caught throwable", throwable ); 109 110 return throwable; 111 } 112 113 return null; 114 } 115 116 private SortedListMultiMap<Integer, KeyValuesReader> getKeyValuesReaders() throws Exception 117 { 118 SortedListMultiMap<Integer, KeyValuesReader> readers = new SortedListMultiMap<>(); 119 120 for( Map.Entry<Integer, List<LogicalInput>> entry : logicalInputs.getEntries() ) 121 { 122 for( LogicalInput logicalInput : entry.getValue() ) 123 readers.put( entry.getKey(), (KeyValuesReader) logicalInput.getReader() ); 124 } 125 126 return readers; 127 } 128 129 private Map.Entry<Tuple, List<Iterable<Tuple>>> forwardToNext( SortedListMultiMap<Integer, KeyValuesReader> readers, SortedListMultiMap<Tuple, Iterable<Tuple>> iterables, List<Iterable<Tuple>> current ) 130 { 131 try 132 { 133 int size = current == null ? readers.getKeys().size() : current.size(); 134 135 for( int ordinal = 0; ordinal < size; ordinal++ ) 136 { 137 if( current != null && current.get( ordinal ) == null ) 138 continue; 139 140 for( KeyValuesReader reader : readers.getValues( ordinal ) ) 141 { 142 if( !reader.next() ) 143 continue; 144 145 Tuple currentKey = (Tuple) reader.getCurrentKey(); 146 147 if( splice.isSorted() ) 148 currentKey = ( (TuplePair) currentKey ).getLhs(); 149 150 currentKey = getDelegatedTuple( currentKey ); // applies hasher 151 152 Iterable<Tuple> currentValues = (Iterable) reader.getCurrentValues(); 153 154 iterables.set( currentKey, ordinal, currentValues ); 155 } 156 } 157 } 158 catch( OutOfMemoryError error ) 159 { 160 handleReThrowableException( "out of memory, try increasing task memory allocation", error ); 161 } 162 catch( CascadingException exception ) 163 { 164 handleException( exception, null ); 165 } 166 catch( Throwable throwable ) 167 { 168 handleException( new DuctException( "internal error", throwable ), null ); 169 } 170 171 return iterables.pollFirstEntry(); 172 } 173 174 private SortedListMultiMap<Tuple, Iterable<Tuple>> getSortedMultiMap( final int length ) 175 { 176 return new SortedListMultiMap<Tuple, Iterable<Tuple>>( getKeyComparator(), length ) 177 { 178 Iterable<Tuple>[] array = new Iterable[ length ]; 179 180 @Override 181 protected List createCollection() 182 { 183 List<Iterable<Tuple>> collection = super.createCollection(); 184 185 Collections.addAll( collection, array ); // init with nulls 186 187 return collection; 188 } 189 }; 190 } 191 192 @Override 193 protected HadoopCoGroupClosure createClosure() 194 { 195 return new TezCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields ); 196 } 197 198 @Override 199 protected Tuple unwrapGrouping( Tuple key ) 200 { 201 return key; 202 } 203 204 }