001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.hadoop; 022 023import java.io.IOException; 024import java.util.Iterator; 025 026import cascading.CascadingException; 027import cascading.flow.FlowException; 028import cascading.flow.FlowNode; 029import cascading.flow.FlowSession; 030import cascading.flow.FlowStep; 031import cascading.flow.Flows; 032import cascading.flow.SliceCounters; 033import cascading.flow.StepCounters; 034import cascading.flow.hadoop.planner.HadoopFlowStepJob; 035import cascading.flow.hadoop.stream.graph.HadoopMapStreamGraph; 036import cascading.flow.hadoop.util.HadoopUtil; 037import cascading.flow.planner.BaseFlowNode; 038import cascading.flow.stream.duct.Duct; 039import cascading.flow.stream.element.ElementDuct; 040import cascading.flow.stream.element.SourceStage; 041import cascading.tap.Tap; 042import org.apache.hadoop.mapred.JobConf; 043import org.apache.hadoop.mapred.MapRunnable; 044import org.apache.hadoop.mapred.OutputCollector; 045import org.apache.hadoop.mapred.RecordReader; 046import org.apache.hadoop.mapred.Reporter; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import static cascading.flow.hadoop.util.HadoopMRUtil.readStateFromDistCache; 051import static cascading.flow.hadoop.util.HadoopUtil.deserializeBase64; 052import static cascading.util.LogUtil.logCounters; 053import static cascading.util.LogUtil.logMemory; 054 055/** Class FlowMapper is the Hadoop Mapper implementation. */ 056public class FlowMapper implements MapRunnable 057 { 058 private static final Logger LOG = LoggerFactory.getLogger( FlowMapper.class ); 059 060 private FlowNode flowNode; 061 private HadoopMapStreamGraph streamGraph; 062 private HadoopFlowProcess currentProcess; 063 064 /** Constructor FlowMapper creates a new FlowMapper instance. */ 065 public FlowMapper() 066 { 067 } 068 069 @Override 070 public void configure( JobConf jobConf ) 071 { 072 try 073 { 074 HadoopUtil.initLog4j( jobConf ); 075 076 LOG.info( "cascading version: {}", jobConf.get( "cascading.version", "" ) ); 077 LOG.info( "child jvm opts: {}", jobConf.get( "mapred.child.java.opts", "" ) ); 078 079 currentProcess = new HadoopFlowProcess( new FlowSession(), jobConf, true ); 080 081 String mapNodeState = jobConf.getRaw( "cascading.flow.step.node.map" ); 082 083 if( mapNodeState == null ) 084 mapNodeState = readStateFromDistCache( jobConf, jobConf.get( FlowStep.CASCADING_FLOW_STEP_ID ), "map" ); 085 086 flowNode = deserializeBase64( mapNodeState, jobConf, BaseFlowNode.class ); 087 088 LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() ); 089 090 Tap source = Flows.getTapForID( flowNode.getSourceTaps(), jobConf.get( "cascading.step.source" ) ); 091 092 streamGraph = new HadoopMapStreamGraph( currentProcess, flowNode, source ); 093 094 for( Duct head : streamGraph.getHeads() ) 095 LOG.info( "sourcing from: " + ( (ElementDuct) head ).getFlowElement() ); 096 097 for( Duct tail : streamGraph.getTails() ) 098 LOG.info( "sinking to: " + ( (ElementDuct) tail ).getFlowElement() ); 099 100 for( Tap trap : flowNode.getTraps() ) 101 LOG.info( "trapping to: " + trap ); 102 103 logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" ); 104 } 105 catch( Throwable throwable ) 106 { 107 reportIfLocal( throwable ); 108 109 if( throwable instanceof CascadingException ) 110 throw (CascadingException) throwable; 111 112 throw new FlowException( "internal error during mapper configuration", throwable ); 113 } 114 } 115 116 @Override 117 public void run( RecordReader input, OutputCollector output, Reporter reporter ) throws IOException 118 { 119 currentProcess.setReporter( reporter ); 120 currentProcess.setOutputCollector( output ); 121 122 streamGraph.prepare(); 123 124 long processBeginTime = System.currentTimeMillis(); 125 126 currentProcess.increment( SliceCounters.Process_Begin_Time, processBeginTime ); 127 currentProcess.increment( StepCounters.Process_Begin_Time, processBeginTime ); 128 129 SourceStage streamedHead = streamGraph.getStreamedHead(); 130 Iterator<Duct> iterator = streamGraph.getHeads().iterator(); 131 132 try 133 { 134 try 135 { 136 while( iterator.hasNext() ) 137 { 138 Duct next = iterator.next(); 139 140 if( next != streamedHead ) 141 ( (SourceStage) next ).run( null ); 142 } 143 144 streamedHead.run( input ); 145 } 146 catch( OutOfMemoryError error ) 147 { 148 throw error; 149 } 150 catch( IOException exception ) 151 { 152 reportIfLocal( exception ); 153 throw exception; 154 } 155 catch( Throwable throwable ) 156 { 157 reportIfLocal( throwable ); 158 159 if( throwable instanceof CascadingException ) 160 throw (CascadingException) throwable; 161 162 throw new FlowException( "internal error during mapper execution", throwable ); 163 } 164 } 165 finally 166 { 167 try 168 { 169 streamGraph.cleanup(); 170 } 171 finally 172 { 173 long processEndTime = System.currentTimeMillis(); 174 175 currentProcess.increment( SliceCounters.Process_End_Time, processEndTime ); 176 currentProcess.increment( SliceCounters.Process_Duration, processEndTime - processBeginTime ); 177 currentProcess.increment( StepCounters.Process_End_Time, processEndTime ); 178 currentProcess.increment( StepCounters.Process_Duration, processEndTime - processBeginTime ); 179 180 String message = "flow node id: " + flowNode.getID(); 181 logMemory( LOG, message + ", mem on close" ); 182 logCounters( LOG, message + ", counter:", currentProcess ); 183 } 184 } 185 } 186 187 /** 188 * Report the error to HadoopFlowStepJob if we are running in Hadoops local mode. 189 * 190 * @param throwable The throwable that was thrown. 191 */ 192 private void reportIfLocal( Throwable throwable ) 193 { 194 if( HadoopUtil.isLocal( currentProcess.getJobConf() ) ) 195 HadoopFlowStepJob.reportLocalError( throwable ); 196 } 197 }