001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.hadoop.stream; 022 023import java.util.Iterator; 024 025import cascading.CascadingException; 026import cascading.flow.FlowProcess; 027import cascading.flow.SliceCounters; 028import cascading.flow.hadoop.HadoopGroupByClosure; 029import cascading.flow.stream.duct.Duct; 030import cascading.flow.stream.duct.DuctException; 031import cascading.flow.stream.element.GroupingSpliceGate; 032import cascading.flow.stream.graph.IORole; 033import cascading.flow.stream.graph.StreamGraph; 034import cascading.pipe.Splice; 035import cascading.pipe.joiner.BufferJoin; 036import cascading.tap.hadoop.util.MeasuredOutputCollector; 037import cascading.tuple.Tuple; 038import cascading.tuple.TupleEntry; 039import org.apache.hadoop.mapred.OutputCollector; 040 041/** 042 * 043 */ 044public abstract class HadoopGroupGate extends GroupingSpliceGate 045 { 046 protected HadoopGroupByClosure closure; 047 protected OutputCollector collector; 048 049 private final boolean isBufferJoin; 050 051 public HadoopGroupGate( FlowProcess flowProcess, Splice splice, IORole role ) 052 { 053 super( flowProcess, splice, role ); 054 055 isBufferJoin = splice.getJoiner() instanceof BufferJoin; 056 } 057 058 @Override 059 public void bind( StreamGraph streamGraph ) 060 { 061 if( role != IORole.sink ) 062 next = getNextFor( streamGraph ); 063 } 064 065 @Override 066 public void prepare() 067 { 068 if( role != IORole.source ) 069 collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() ); 070 071 if( role != IORole.sink ) 072 closure = createClosure(); 073 074 if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) 075 grouping.joinerClosure = closure; 076 } 077 078 protected abstract OutputCollector createOutputCollector(); 079 080 @Override 081 public void start( Duct previous ) 082 { 083 if( next != null ) 084 super.start( previous ); 085 } 086 087 // todo: receive should receive the edge or ordinal so no lookup 088 public void receive( Duct previous, int ordinal, TupleEntry incomingEntry ) 089 { 090 // create a view over the incoming tuple 091 Tuple groupTupleView = keyBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null ); 092 093 // reset keyTuple via groupTuple or groupSortTuple 094 if( sortFields == null ) 095 groupTuple.reset( groupTupleView ); 096 else 097 groupSortTuple.reset( groupTupleView, sortBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null ) ); 098 099 valueTuple.reset( valuesBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null ) ); 100 101 try 102 { 103 // keyTuple is a reference to either groupTuple or groupSortTuple 104 wrapGroupingAndCollect( previous, ordinal, (Tuple) valueTuple, keyTuple ); 105 flowProcess.increment( SliceCounters.Tuples_Written, 1 ); 106 } 107 catch( OutOfMemoryError error ) 108 { 109 handleReThrowableException( "out of memory, try increasing task memory allocation", error ); 110 } 111 catch( CascadingException exception ) 112 { 113 handleException( exception, incomingEntry ); 114 } 115 catch( Throwable throwable ) 116 { 117 handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry ); 118 } 119 } 120 121 @Override 122 public void complete( Duct previous ) 123 { 124 if( next != null ) 125 super.complete( previous ); 126 } 127 128 public void accept( Tuple key, Iterator<Tuple>[] values ) 129 { 130 key = unwrapGrouping( key ); 131 132 closure.reset( key, values ); 133 134 // Buffer is using JoinerClosure directly 135 if( !isBufferJoin ) 136 tupleEntryIterator.reset( splice.getJoiner().getIterator( closure ) ); 137 else 138 tupleEntryIterator.reset( values ); 139 140 keyEntry.setTuple( closure.getGroupTuple( key ) ); 141 142 next.receive( this, 0, grouping ); 143 } 144 145 protected abstract HadoopGroupByClosure createClosure(); 146 147 protected abstract void wrapGroupingAndCollect( Duct previous, int ordinal, Tuple valuesTuple, Tuple groupKey ) throws java.io.IOException; 148 149 protected abstract Tuple unwrapGrouping( Tuple key ); 150 }