001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.hadoop;
022
023import java.io.IOException;
024import java.util.Iterator;
025
026import cascading.CascadingException;
027import cascading.flow.FlowException;
028import cascading.flow.FlowNode;
029import cascading.flow.FlowSession;
030import cascading.flow.FlowStep;
031import cascading.flow.Flows;
032import cascading.flow.SliceCounters;
033import cascading.flow.StepCounters;
034import cascading.flow.hadoop.planner.HadoopFlowStepJob;
035import cascading.flow.hadoop.stream.graph.HadoopMapStreamGraph;
036import cascading.flow.hadoop.util.HadoopUtil;
037import cascading.flow.planner.BaseFlowNode;
038import cascading.flow.stream.duct.Duct;
039import cascading.flow.stream.element.ElementDuct;
040import cascading.flow.stream.element.SourceStage;
041import cascading.tap.Tap;
042import org.apache.hadoop.mapred.JobConf;
043import org.apache.hadoop.mapred.MapRunnable;
044import org.apache.hadoop.mapred.OutputCollector;
045import org.apache.hadoop.mapred.RecordReader;
046import org.apache.hadoop.mapred.Reporter;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050import static cascading.flow.hadoop.util.HadoopMRUtil.readStateFromDistCache;
051import static cascading.flow.hadoop.util.HadoopUtil.deserializeBase64;
052import static cascading.util.LogUtil.logCounters;
053import static cascading.util.LogUtil.logMemory;
054
055/** Class FlowMapper is the Hadoop Mapper implementation. */
056public class FlowMapper implements MapRunnable
057  {
058  private static final Logger LOG = LoggerFactory.getLogger( FlowMapper.class );
059
060  private FlowNode flowNode;
061  private HadoopMapStreamGraph streamGraph;
062  private HadoopFlowProcess currentProcess;
063
064  /** Constructor FlowMapper creates a new FlowMapper instance. */
065  public FlowMapper()
066    {
067    }
068
069  @Override
070  public void configure( JobConf jobConf )
071    {
072    try
073      {
074      HadoopUtil.initLog4j( jobConf );
075
076      LOG.info( "cascading version: {}", jobConf.get( "cascading.version", "" ) );
077      LOG.info( "child jvm opts: {}", jobConf.get( "mapred.child.java.opts", "" ) );
078
079      currentProcess = new HadoopFlowProcess( new FlowSession(), jobConf, true );
080
081      String mapNodeState = jobConf.getRaw( "cascading.flow.step.node.map" );
082
083      if( mapNodeState == null )
084        mapNodeState = readStateFromDistCache( jobConf, jobConf.get( FlowStep.CASCADING_FLOW_STEP_ID ), "map" );
085
086      flowNode = deserializeBase64( mapNodeState, jobConf, BaseFlowNode.class );
087
088      LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() );
089
090      Tap source = Flows.getTapForID( flowNode.getSourceTaps(), jobConf.get( "cascading.step.source" ) );
091
092      streamGraph = new HadoopMapStreamGraph( currentProcess, flowNode, source );
093
094      for( Duct head : streamGraph.getHeads() )
095        LOG.info( "sourcing from: " + ( (ElementDuct) head ).getFlowElement() );
096
097      for( Duct tail : streamGraph.getTails() )
098        LOG.info( "sinking to: " + ( (ElementDuct) tail ).getFlowElement() );
099
100      for( Tap trap : flowNode.getTraps() )
101        LOG.info( "trapping to: " + trap );
102
103      logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" );
104      }
105    catch( Throwable throwable )
106      {
107      reportIfLocal( throwable );
108
109      if( throwable instanceof CascadingException )
110        throw (CascadingException) throwable;
111
112      throw new FlowException( "internal error during mapper configuration", throwable );
113      }
114    }
115
116  @Override
117  public void run( RecordReader input, OutputCollector output, Reporter reporter ) throws IOException
118    {
119    currentProcess.setReporter( reporter );
120    currentProcess.setOutputCollector( output );
121
122    streamGraph.prepare();
123
124    long processBeginTime = System.currentTimeMillis();
125
126    currentProcess.increment( SliceCounters.Process_Begin_Time, processBeginTime );
127    currentProcess.increment( StepCounters.Process_Begin_Time, processBeginTime );
128
129    SourceStage streamedHead = streamGraph.getStreamedHead();
130    Iterator<Duct> iterator = streamGraph.getHeads().iterator();
131
132    try
133      {
134      try
135        {
136        while( iterator.hasNext() )
137          {
138          Duct next = iterator.next();
139
140          if( next != streamedHead )
141            ( (SourceStage) next ).run( null );
142          }
143
144        streamedHead.run( input );
145        }
146      catch( OutOfMemoryError error )
147        {
148        throw error;
149        }
150      catch( IOException exception )
151        {
152        reportIfLocal( exception );
153        throw exception;
154        }
155      catch( Throwable throwable )
156        {
157        reportIfLocal( throwable );
158
159        if( throwable instanceof CascadingException )
160          throw (CascadingException) throwable;
161
162        throw new FlowException( "internal error during mapper execution", throwable );
163        }
164      }
165    finally
166      {
167      try
168        {
169        streamGraph.cleanup();
170        }
171      finally
172        {
173        long processEndTime = System.currentTimeMillis();
174
175        currentProcess.increment( SliceCounters.Process_End_Time, processEndTime );
176        currentProcess.increment( SliceCounters.Process_Duration, processEndTime - processBeginTime );
177        currentProcess.increment( StepCounters.Process_End_Time, processEndTime );
178        currentProcess.increment( StepCounters.Process_Duration, processEndTime - processBeginTime );
179
180        String message = "flow node id: " + flowNode.getID();
181        logMemory( LOG, message + ", mem on close" );
182        logCounters( LOG, message + ", counter:", currentProcess );
183        }
184      }
185    }
186
187  /**
188   * Report the error to HadoopFlowStepJob if we are running in Hadoops local mode.
189   *
190   * @param throwable The throwable that was thrown.
191   */
192  private void reportIfLocal( Throwable throwable )
193    {
194    if( HadoopUtil.isLocal( currentProcess.getJobConf() ) )
195      HadoopFlowStepJob.reportLocalError( throwable );
196    }
197  }