001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.hadoop.stream;
022
023import java.util.Iterator;
024
025import cascading.CascadingException;
026import cascading.flow.FlowProcess;
027import cascading.flow.SliceCounters;
028import cascading.flow.hadoop.HadoopGroupByClosure;
029import cascading.flow.stream.duct.Duct;
030import cascading.flow.stream.duct.DuctException;
031import cascading.flow.stream.element.GroupingSpliceGate;
032import cascading.flow.stream.graph.IORole;
033import cascading.flow.stream.graph.StreamGraph;
034import cascading.pipe.Splice;
035import cascading.pipe.joiner.BufferJoin;
036import cascading.tap.hadoop.util.MeasuredOutputCollector;
037import cascading.tuple.Tuple;
038import cascading.tuple.TupleEntry;
039import org.apache.hadoop.mapred.OutputCollector;
040
041/**
042 *
043 */
044public abstract class HadoopGroupGate extends GroupingSpliceGate
045  {
046  protected HadoopGroupByClosure closure;
047  protected OutputCollector collector;
048
049  private final boolean isBufferJoin;
050
051  public HadoopGroupGate( FlowProcess flowProcess, Splice splice, IORole role )
052    {
053    super( flowProcess, splice, role );
054
055    isBufferJoin = splice.getJoiner() instanceof BufferJoin;
056    }
057
058  @Override
059  public void bind( StreamGraph streamGraph )
060    {
061    if( role != IORole.sink )
062      next = getNextFor( streamGraph );
063    }
064
065  @Override
066  public void prepare()
067    {
068    if( role != IORole.source )
069      collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() );
070
071    if( role != IORole.sink )
072      closure = createClosure();
073
074    if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() )
075      grouping.joinerClosure = closure;
076    }
077
078  protected abstract OutputCollector createOutputCollector();
079
080  @Override
081  public void start( Duct previous )
082    {
083    if( next != null )
084      super.start( previous );
085    }
086
087  // todo: receive should receive the edge or ordinal so no lookup
088  public void receive( Duct previous, int ordinal, TupleEntry incomingEntry )
089    {
090    // create a view over the incoming tuple
091    Tuple groupTupleView = keyBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null );
092
093    // reset keyTuple via groupTuple or groupSortTuple
094    if( sortFields == null )
095      groupTuple.reset( groupTupleView );
096    else
097      groupSortTuple.reset( groupTupleView, sortBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null ) );
098
099    valueTuple.reset( valuesBuilder[ ordinal ].makeResult( incomingEntry.getTuple(), null ) );
100
101    try
102      {
103      // keyTuple is a reference to either groupTuple or groupSortTuple
104      wrapGroupingAndCollect( previous, ordinal, (Tuple) valueTuple, keyTuple );
105      flowProcess.increment( SliceCounters.Tuples_Written, 1 );
106      }
107    catch( OutOfMemoryError error )
108      {
109      handleReThrowableException( "out of memory, try increasing task memory allocation", error );
110      }
111    catch( CascadingException exception )
112      {
113      handleException( exception, incomingEntry );
114      }
115    catch( Throwable throwable )
116      {
117      handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry );
118      }
119    }
120
121  @Override
122  public void complete( Duct previous )
123    {
124    if( next != null )
125      super.complete( previous );
126    }
127
128  public void accept( Tuple key, Iterator<Tuple>[] values )
129    {
130    key = unwrapGrouping( key );
131
132    closure.reset( key, values );
133
134    // Buffer is using JoinerClosure directly
135    if( !isBufferJoin )
136      tupleEntryIterator.reset( splice.getJoiner().getIterator( closure ) );
137    else
138      tupleEntryIterator.reset( values );
139
140    keyEntry.setTuple( closure.getGroupTuple( key ) );
141
142    next.receive( this, 0, grouping );
143    }
144
145  protected abstract HadoopGroupByClosure createClosure();
146
147  protected abstract void wrapGroupingAndCollect( Duct previous, int ordinal, Tuple valuesTuple, Tuple groupKey ) throws java.io.IOException;
148
149  protected abstract Tuple unwrapGrouping( Tuple key );
150  }