001/*
002 * Copyright (c) 2016-2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.flow.hadoop.util;
023
024import java.io.IOException;
025import java.io.UnsupportedEncodingException;
026import java.lang.reflect.Constructor;
027import java.lang.reflect.Field;
028import java.lang.reflect.InvocationTargetException;
029import java.net.URI;
030import java.net.URL;
031import java.util.Collection;
032import java.util.Collections;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Map;
038import java.util.Properties;
039import java.util.Set;
040import java.util.jar.Attributes;
041import java.util.jar.Manifest;
042
043import cascading.CascadingException;
044import cascading.flow.FlowException;
045import cascading.flow.planner.BaseFlowStep;
046import cascading.flow.planner.PlatformInfo;
047import cascading.flow.planner.Scope;
048import cascading.pipe.Group;
049import cascading.scheme.hadoop.TextLine;
050import cascading.tap.hadoop.Hfs;
051import cascading.tuple.Fields;
052import cascading.util.LogUtil;
053import cascading.util.Util;
054import org.apache.commons.codec.binary.Base64;
055import org.apache.hadoop.conf.Configurable;
056import org.apache.hadoop.conf.Configuration;
057import org.apache.hadoop.fs.FileStatus;
058import org.apache.hadoop.fs.FileSystem;
059import org.apache.hadoop.fs.LocalFileSystem;
060import org.apache.hadoop.fs.Path;
061import org.apache.hadoop.mapred.JobConf;
062import org.apache.hadoop.util.StringUtils;
063import org.slf4j.Logger;
064import org.slf4j.LoggerFactory;
065
066import static cascading.util.Util.invokeInstanceMethod;
067
068/**
069 *
070 */
071public class HadoopUtil
072  {
073  public static final String CASCADING_FLOW_EXECUTING = "cascading.flow.executing";
074
075  private static final Logger LOG = LoggerFactory.getLogger( HadoopUtil.class );
076  private static final String ENCODING = "US-ASCII";
077  private static final Class<?> DEFAULT_OBJECT_SERIALIZER = JavaObjectSerializer.class;
078
079  private static PlatformInfo platformInfo;
080
081  public static void setIsInflow( Configuration conf )
082    {
083    conf.setBoolean( CASCADING_FLOW_EXECUTING, true );
084    }
085
086  public static boolean isInflow( Configuration conf )
087    {
088    return conf.getBoolean( CASCADING_FLOW_EXECUTING, false );
089    }
090
091  public static void initLog4j( JobConf configuration )
092    {
093    initLog4j( (Configuration) configuration );
094    }
095
096  public static void initLog4j( Configuration configuration )
097    {
098    String values = configuration.get( "log4j.logger", null );
099
100    if( values == null || values.length() == 0 )
101      return;
102
103    if( !Util.hasClass( "org.apache.log4j.Logger" ) )
104      {
105      LOG.info( "org.apache.log4j.Logger is not in the current CLASSPATH, not setting log4j.logger properties" );
106      return;
107      }
108
109    String[] elements = values.split( "," );
110
111    for( String element : elements )
112      LogUtil.setLog4jLevel( element.split( "=" ) );
113    }
114
115  // only place JobConf should ever be returned
116  public static JobConf asJobConfInstance( Configuration configuration )
117    {
118    if( configuration instanceof JobConf )
119      return (JobConf) configuration;
120
121    return new JobConf( configuration );
122    }
123
124  public static <C> C copyJobConf( C parentJobConf )
125    {
126    return copyConfiguration( parentJobConf );
127    }
128
129  public static JobConf copyJobConf( JobConf parentJobConf )
130    {
131    if( parentJobConf == null )
132      throw new IllegalArgumentException( "parent may not be null" );
133
134    // see https://github.com/Cascading/cascading/pull/21
135    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
136    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
137    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
138    final Configuration configurationCopy = new Configuration( parentJobConf );
139    final JobConf jobConf = new JobConf( configurationCopy );
140
141    jobConf.getCredentials().addAll( parentJobConf.getCredentials() );
142
143    return jobConf;
144    }
145
146  public static JobConf createJobConf( Map<Object, Object> properties )
147    {
148    return createJobConf( properties, null );
149    }
150
151  public static JobConf createJobConf( Map<Object, Object> properties, JobConf defaultJobconf )
152    {
153    JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf( defaultJobconf );
154
155    if( properties == null )
156      return jobConf;
157
158    return copyConfiguration( properties, jobConf );
159    }
160
161  public static <C> C copyConfiguration( C parent )
162    {
163    if( parent == null )
164      throw new IllegalArgumentException( "parent may not be null" );
165
166    if( !( parent instanceof Configuration ) )
167      throw new IllegalArgumentException( "parent must be of type Configuration" );
168
169    Configuration conf = (Configuration) parent;
170
171    // see https://github.com/Cascading/cascading/pull/21
172    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
173    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
174    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
175    Configuration configurationCopy = new Configuration( conf );
176
177    Configuration copiedConf = callCopyConstructor( parent.getClass(), configurationCopy );
178
179    if( Util.hasInstanceMethod( parent, "getCredentials", null ) )
180      {
181      Object result = invokeInstanceMethod( parent, "getCredentials", null, null );
182      Object credentials = invokeInstanceMethod( copiedConf, "getCredentials", null, null );
183
184      invokeInstanceMethod( credentials, "addAll", new Object[]{result}, new Class[]{credentials.getClass()} );
185      }
186
187    return (C) copiedConf;
188    }
189
190  protected static <C extends Configuration> C callCopyConstructor( Class type, Configuration parent )
191    {
192    try
193      {
194      Constructor<C> constructor = type.getConstructor( parent.getClass() );
195
196      return constructor.newInstance( parent );
197      }
198    catch( NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException exception )
199      {
200      throw new CascadingException( "unable to create copy of: " + type );
201      }
202    }
203
204  public static <C extends Configuration> C copyConfiguration( Map<Object, Object> srcProperties, C dstConfiguration )
205    {
206    Set<Object> keys = new HashSet<Object>( srcProperties.keySet() );
207
208    // keys will only be grabbed if both key/value are String, so keep orig keys
209    if( srcProperties instanceof Properties )
210      keys.addAll( ( (Properties) srcProperties ).stringPropertyNames() );
211
212    for( Object key : keys )
213      {
214      Object value = srcProperties.get( key );
215
216      if( value == null && srcProperties instanceof Properties && key instanceof String )
217        value = ( (Properties) srcProperties ).getProperty( (String) key );
218
219      if( value == null ) // don't stuff null values
220        continue;
221
222      // don't let these objects pass, even though toString is called below.
223      if( value instanceof Class || value instanceof JobConf )
224        continue;
225
226      dstConfiguration.set( key.toString(), value.toString() );
227      }
228
229    return dstConfiguration;
230    }
231
232  public static Map<Object, Object> createProperties( Configuration jobConf )
233    {
234    Map<Object, Object> properties = new HashMap<Object, Object>();
235
236    if( jobConf == null )
237      return properties;
238
239    for( Map.Entry<String, String> entry : jobConf )
240      properties.put( entry.getKey(), entry.getValue() );
241
242    return properties;
243    }
244
245  public static Thread getHDFSShutdownHook()
246    {
247    Exception caughtException;
248
249    try
250      {
251      // we must init the FS so the finalizer is registered
252      FileSystem.getLocal( new JobConf() );
253
254      Field field = FileSystem.class.getDeclaredField( "clientFinalizer" );
255      field.setAccessible( true );
256
257      Thread finalizer = (Thread) field.get( null );
258
259      if( finalizer != null )
260        Runtime.getRuntime().removeShutdownHook( finalizer );
261
262      return finalizer;
263      }
264    catch( NoSuchFieldException exception )
265      {
266      caughtException = exception;
267      }
268    catch( IllegalAccessException exception )
269      {
270      caughtException = exception;
271      }
272    catch( IOException exception )
273      {
274      caughtException = exception;
275      }
276
277    LOG.debug( "unable to find and remove client hdfs shutdown hook, received exception: {}", caughtException.getClass().getName() );
278
279    return null;
280    }
281
282  public static String encodeBytes( byte[] bytes )
283    {
284    try
285      {
286      return new String( Base64.encodeBase64( bytes ), ENCODING );
287      }
288    catch( UnsupportedEncodingException exception )
289      {
290      throw new RuntimeException( exception );
291      }
292    }
293
294  public static byte[] decodeBytes( String string )
295    {
296    try
297      {
298      byte[] bytes = string.getBytes( ENCODING );
299      return Base64.decodeBase64( bytes );
300      }
301    catch( UnsupportedEncodingException exception )
302      {
303      throw new RuntimeException( exception );
304      }
305    }
306
307  public static <T> ObjectSerializer instantiateSerializer( Configuration conf, Class<T> type ) throws ClassNotFoundException
308    {
309    Class<ObjectSerializer> flowSerializerClass;
310
311    String serializerClassName = conf.get( ObjectSerializer.OBJECT_SERIALIZER_PROPERTY );
312
313    if( serializerClassName == null || serializerClassName.length() == 0 )
314      flowSerializerClass = (Class<ObjectSerializer>) DEFAULT_OBJECT_SERIALIZER;
315    else
316      flowSerializerClass = (Class<ObjectSerializer>) Class.forName( serializerClassName );
317
318    ObjectSerializer objectSerializer;
319
320    try
321      {
322      objectSerializer = flowSerializerClass.newInstance();
323
324      if( objectSerializer instanceof Configurable )
325        ( (Configurable) objectSerializer ).setConf( conf );
326      }
327    catch( Exception exception )
328      {
329      exception.printStackTrace();
330      throw new IllegalArgumentException( "Unable to instantiate serializer \""
331        + flowSerializerClass.getName()
332        + "\" for class: "
333        + type.getName() );
334      }
335
336    if( !objectSerializer.accepts( type ) )
337      throw new IllegalArgumentException( serializerClassName + " won't accept objects of class " + type.toString() );
338
339    return objectSerializer;
340    }
341
342  public static <T> String serializeBase64( T object, Configuration conf ) throws IOException
343    {
344    return serializeBase64( object, conf, true );
345    }
346
347  public static <T> String serializeBase64( T object, Configuration conf, boolean compress ) throws IOException
348    {
349    ObjectSerializer objectSerializer;
350
351    try
352      {
353      objectSerializer = instantiateSerializer( conf, object.getClass() );
354      }
355    catch( ClassNotFoundException exception )
356      {
357      throw new IOException( exception );
358      }
359
360    return encodeBytes( objectSerializer.serialize( object, compress ) );
361    }
362
363  /**
364   * This method deserializes the Base64 encoded String into an Object instance.
365   *
366   * @param string
367   * @return an Object
368   */
369  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type ) throws IOException
370    {
371    return deserializeBase64( string, conf, type, true );
372    }
373
374  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type, boolean decompress ) throws IOException
375    {
376    if( string == null || string.length() == 0 )
377      return null;
378
379    ObjectSerializer objectSerializer;
380
381    try
382      {
383      objectSerializer = instantiateSerializer( conf, type );
384      }
385    catch( ClassNotFoundException exception )
386      {
387      throw new IOException( exception );
388      }
389
390    return objectSerializer.deserialize( decodeBytes( string ), type, decompress );
391    }
392
393  public static Class findMainClass( Class defaultType )
394    {
395    return Util.findMainClass( defaultType, "org.apache.hadoop" );
396    }
397
398  public static Map<String, String> getConfig( Configuration defaultConf, Configuration updatedConf )
399    {
400    Map<String, String> configs = new HashMap<String, String>();
401
402    for( Map.Entry<String, String> entry : updatedConf )
403      configs.put( entry.getKey(), entry.getValue() );
404
405    for( Map.Entry<String, String> entry : defaultConf )
406      {
407      if( entry.getValue() == null )
408        continue;
409
410      String updatedValue = configs.get( entry.getKey() );
411
412      // if both null, lets purge from map to save space
413      if( updatedValue == null && entry.getValue() == null )
414        configs.remove( entry.getKey() );
415
416      // if the values are the same, lets also purge from map to save space
417      if( updatedValue != null && updatedValue.equals( entry.getValue() ) )
418        configs.remove( entry.getKey() );
419
420      configs.remove( "mapred.working.dir" );
421      configs.remove( "mapreduce.job.working.dir" ); // hadoop2
422      }
423
424    return configs;
425    }
426
427  public static JobConf[] getJobConfs( Configuration job, List<Map<String, String>> configs )
428    {
429    JobConf[] jobConfs = new JobConf[ configs.size() ];
430
431    for( int i = 0; i < jobConfs.length; i++ )
432      jobConfs[ i ] = (JobConf) mergeConf( job, configs.get( i ), false );
433
434    return jobConfs;
435    }
436
437  public static <J extends Configuration> J mergeConf( J job, Map<String, String> config, boolean directly )
438    {
439    Configuration currentConf = directly ? job : ( job instanceof JobConf ? copyJobConf( (JobConf) job ) : new Configuration( job ) );
440
441    for( String key : config.keySet() )
442      {
443      if( LOG.isDebugEnabled() )
444        LOG.debug( "merging key: {} value: {}", key, config.get( key ) );
445
446      currentConf.set( key, config.get( key ) );
447      }
448
449    return (J) currentConf;
450    }
451
452  public static Configuration removePropertiesFrom( Configuration jobConf, String... keys )
453    {
454    Map<Object, Object> properties = createProperties( jobConf );
455
456    for( String key : keys )
457      properties.remove( key );
458
459    return copyConfiguration( properties, new JobConf() );
460    }
461
462  public static boolean removeStateFromDistCache( Configuration conf, String path ) throws IOException
463    {
464    return new Hfs( new TextLine(), path ).deleteResource( conf );
465    }
466
467  public static PlatformInfo getPlatformInfo()
468    {
469    if( platformInfo == null )
470      platformInfo = getPlatformInfoInternal( JobConf.class, "org/apache/hadoop", "Hadoop" );
471
472    return platformInfo;
473    }
474
475  public static PlatformInfo getPlatformInfo( Class type, String attributePath, String platformName )
476    {
477    if( platformInfo == null )
478      platformInfo = getPlatformInfoInternal( type, attributePath, platformName );
479
480    return platformInfo;
481    }
482
483  public static PlatformInfo createPlatformInfo( Class type, String attributePath, String platformName )
484    {
485    return getPlatformInfoInternal( type, attributePath, platformName );
486    }
487
488  private static PlatformInfo getPlatformInfoInternal( Class type, String attributePath, String platformName )
489    {
490    URL url = type.getResource( type.getSimpleName() + ".class" );
491
492    if( url == null || !url.toString().startsWith( "jar" ) )
493      return new PlatformInfo( platformName, null, null );
494
495    String path = url.toString();
496    path = path.substring( 0, path.lastIndexOf( "!" ) + 1 );
497
498    String manifestPath = path + "/META-INF/MANIFEST.MF";
499    String parsedVersion = Util.findVersion( path.substring( 0, path.length() - 1 ) );
500
501    Manifest manifest;
502
503    try
504      {
505      manifest = new Manifest( new URL( manifestPath ).openStream() );
506      }
507    catch( IOException exception )
508      {
509      LOG.warn( "unable to get manifest from {}: {}", manifestPath, exception.getMessage() );
510
511      return new PlatformInfo( platformName, null, parsedVersion );
512      }
513
514    Attributes attributes = manifest.getAttributes( attributePath );
515
516    if( attributes == null )
517      attributes = manifest.getMainAttributes();
518
519    if( attributes == null )
520      {
521      LOG.debug( "unable to get platform manifest attributes" );
522      return new PlatformInfo( platformName, null, parsedVersion );
523      }
524
525    String vendor = attributes.getValue( "Implementation-Vendor" );
526    String version = attributes.getValue( "Implementation-Version" );
527
528    if( Util.isEmpty( version ) )
529      version = parsedVersion;
530
531    return new PlatformInfo( platformName, vendor, version );
532    }
533
534  /**
535   * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are
536   * changed to match the local 'from' path.
537   * <p>
538   * Returns a map of file-name to remote modification times if the remote time is different than the local time.
539   *
540   * @param config
541   * @param commonPaths
542   * @param syncTimes
543   */
544  public static Map<String, Long> syncPaths( Configuration config, Map<Path, Path> commonPaths, boolean syncTimes )
545    {
546    if( commonPaths == null )
547      return Collections.emptyMap();
548
549    Map<String, Long> timestampMap = new HashMap<>();
550
551    Map<Path, Path> copyPaths = getCopyPaths( config, commonPaths ); // tests remote file existence or if stale
552
553    LocalFileSystem localFS = getLocalFS( config );
554    FileSystem remoteFS = getDefaultFS( config );
555
556    for( Map.Entry<Path, Path> entry : copyPaths.entrySet() )
557      {
558      Path localPath = entry.getKey();
559      Path remotePath = entry.getValue();
560
561      try
562        {
563        LOG.info( "copying from: {}, to: {}", localPath, remotePath );
564        remoteFS.copyFromLocalFile( localPath, remotePath );
565
566        if( !syncTimes )
567          {
568          timestampMap.put( remotePath.getName(), remoteFS.getFileStatus( remotePath ).getModificationTime() );
569          continue;
570          }
571        }
572      catch( IOException exception )
573        {
574        throw new FlowException( "unable to copy local: " + localPath + " to remote: " + remotePath, exception );
575        }
576
577      FileStatus localFileStatus = null;
578
579      try
580        {
581        // sync the modified times so we can lazily upload jars to hdfs after job is started
582        // otherwise modified time will be local to hdfs
583        localFileStatus = localFS.getFileStatus( localPath );
584        remoteFS.setTimes( remotePath, localFileStatus.getModificationTime(), -1 ); // don't set the access time
585        }
586      catch( IOException exception )
587        {
588        LOG.info( "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.", remotePath );
589
590        if( localFileStatus != null )
591          timestampMap.put( remotePath.getName(), localFileStatus.getModificationTime() );
592        }
593      }
594
595    return timestampMap;
596    }
597
598  public static Map<Path, Path> getCommonPaths( Map<String, Path> localPaths, Map<String, Path> remotePaths )
599    {
600    Map<Path, Path> commonPaths = new HashMap<Path, Path>();
601
602    for( Map.Entry<String, Path> entry : localPaths.entrySet() )
603      {
604      if( remotePaths.containsKey( entry.getKey() ) )
605        commonPaths.put( entry.getValue(), remotePaths.get( entry.getKey() ) );
606      }
607
608    return commonPaths;
609    }
610
611  private static Map<Path, Path> getCopyPaths( Configuration config, Map<Path, Path> commonPaths )
612    {
613    Map<Path, Path> copyPaths = new HashMap<Path, Path>();
614
615    FileSystem remoteFS = getDefaultFS( config );
616    FileSystem localFS = getLocalFS( config );
617
618    for( Map.Entry<Path, Path> entry : commonPaths.entrySet() )
619      {
620      Path localPath = entry.getKey();
621      Path remotePath = entry.getValue();
622
623      try
624        {
625        boolean localExists = localFS.exists( localPath );
626        boolean remoteExist = remoteFS.exists( remotePath );
627
628        if( localExists && !remoteExist )
629          {
630          copyPaths.put( localPath, remotePath );
631          }
632        else if( localExists )
633          {
634          long localModTime = localFS.getFileStatus( localPath ).getModificationTime();
635          long remoteModTime = remoteFS.getFileStatus( remotePath ).getModificationTime();
636
637          if( localModTime > remoteModTime )
638            copyPaths.put( localPath, remotePath );
639          }
640        }
641      catch( IOException exception )
642        {
643        throw new FlowException( "unable to get handle to underlying filesystem", exception );
644        }
645      }
646
647    return copyPaths;
648    }
649
650  public static void resolvePaths( Configuration config, Collection<String> classpath, String remoteRoot, String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths )
651    {
652    FileSystem defaultFS = getDefaultFS( config );
653    FileSystem localFS = getLocalFS( config );
654
655    Path remoteRootPath = new Path( remoteRoot == null ? "./.staging" : remoteRoot );
656
657    if( resourceSubPath != null )
658      remoteRootPath = new Path( remoteRootPath, resourceSubPath );
659
660    remoteRootPath = defaultFS.makeQualified( remoteRootPath );
661
662    boolean defaultIsLocal = defaultFS.equals( localFS );
663
664    for( String stringPath : classpath )
665      {
666      Path path = new Path( stringPath );
667
668      URI uri = path.toUri();
669
670      if( uri.getScheme() == null && !defaultIsLocal ) // we want to sync
671        {
672        Path localPath = localFS.makeQualified( path );
673
674        if( !exists( localFS, localPath ) )
675          throw new FlowException( "path not found: " + localPath );
676
677        String name = localPath.getName();
678
679        if( resourceSubPath != null )
680          name = resourceSubPath + "/" + name;
681
682        localPaths.put( name, localPath );
683        remotePaths.put( name, defaultFS.makeQualified( new Path( remoteRootPath, path.getName() ) ) );
684        }
685      else if( localFS.equals( getFileSystem( config, path ) ) )
686        {
687        if( !exists( localFS, path ) )
688          throw new FlowException( "path not found: " + path );
689
690        Path localPath = localFS.makeQualified( path );
691
692        String name = localPath.getName();
693
694        if( resourceSubPath != null )
695          name = resourceSubPath + "/" + name;
696
697        localPaths.put( name, localPath );
698        }
699      else
700        {
701        if( !exists( defaultFS, path ) )
702          throw new FlowException( "path not found: " + path );
703
704        Path defaultPath = defaultFS.makeQualified( path );
705
706        String name = defaultPath.getName();
707
708        if( resourceSubPath != null )
709          name = resourceSubPath + "/" + name;
710
711        remotePaths.put( name, defaultPath );
712        }
713      }
714    }
715
716  private static boolean exists( FileSystem fileSystem, Path path )
717    {
718    try
719      {
720      return fileSystem.exists( path );
721      }
722    catch( IOException exception )
723      {
724      throw new FlowException( "could not test file exists: " + path );
725      }
726    }
727
728  private static FileSystem getFileSystem( Configuration config, Path path )
729    {
730    try
731      {
732      return path.getFileSystem( config );
733      }
734    catch( IOException exception )
735      {
736      throw new FlowException( "unable to get handle to underlying filesystem", exception );
737      }
738    }
739
740  public static LocalFileSystem getLocalFS( Configuration config )
741    {
742    try
743      {
744      return FileSystem.getLocal( config );
745      }
746    catch( IOException exception )
747      {
748      throw new FlowException( "unable to get handle to underlying filesystem", exception );
749      }
750    }
751
752  public static FileSystem getDefaultFS( Configuration config )
753    {
754    try
755      {
756      return FileSystem.get( config );
757      }
758    catch( IOException exception )
759      {
760      throw new FlowException( "unable to get handle to underlying filesystem", exception );
761      }
762    }
763
764  public static boolean isLocal( Configuration conf )
765    {
766    // hadoop 1.0 and 2.0 use different properties to define local mode: we check the new YARN
767    // property first
768    String frameworkName = conf.get( "mapreduce.framework.name" );
769
770    // we are running on hadoop 2.0 (YARN)
771    if( frameworkName != null )
772      return frameworkName.equals( "local" );
773
774    // for Tez
775    String tezLocal = conf.get( "tez.local.mode" );
776
777    if( tezLocal != null )
778      return tezLocal.equals( "true" );
779
780    // hadoop 1.0: use the old property to determine the local mode
781    String hadoop1 = conf.get( "mapred.job.tracker" );
782
783    if( hadoop1 == null )
784      {
785      LOG.warn( "could not successfully test if Hadoop based platform is in standalone/local mode, no valid properties set, returning false - tests for: mapreduce.framework.name, tez.local.mode, and mapred.job.tracker" );
786      return false;
787      }
788
789    return hadoop1.equals( "local" );
790    }
791
792  public static boolean isYARN( Configuration conf )
793    {
794    return conf.get( "mapreduce.framework.name" ) != null;
795    }
796
797  public static void setLocal( Configuration conf )
798    {
799    // set both properties to local
800    conf.set( "mapred.job.tracker", "local" );
801
802    // yarn
803    conf.set( "mapreduce.framework.name", "local" );
804
805    // tez
806    conf.set( "tez.local.mode", "true" );
807    conf.set( "tez.runtime.optimize.local.fetch", "true" );
808    }
809
810  private static boolean interfaceAssignableFromClassName( Class<?> xface, String className )
811    {
812    if( ( className == null ) || ( xface == null ) )
813      return false;
814
815    try
816      {
817      Class<?> klass = Class.forName( className );
818      if( klass == null )
819        return false;
820
821      if( !xface.isAssignableFrom( klass ) )
822        return false;
823
824      return true;
825      }
826    catch( ClassNotFoundException cnfe )
827      {
828      return false; // let downstream figure it out
829      }
830    }
831
832  public static boolean setNewApi( Configuration conf, String className )
833    {
834    if( className == null ) // silently return and let the error be caught downstream
835      return false;
836
837    boolean isStable = className.startsWith( "org.apache.hadoop.mapred." )
838      || interfaceAssignableFromClassName( org.apache.hadoop.mapred.InputFormat.class, className );
839
840    boolean isNew = className.startsWith( "org.apache.hadoop.mapreduce." )
841      || interfaceAssignableFromClassName( org.apache.hadoop.mapreduce.InputFormat.class, className );
842
843    if( isStable )
844      conf.setBoolean( "mapred.mapper.new-api", false );
845    else if( isNew )
846      conf.setBoolean( "mapred.mapper.new-api", true );
847    else
848      throw new IllegalStateException( "cannot determine if class denotes stable or new api, please set 'mapred.mapper.new-api' to the appropriate value" );
849
850    return true;
851    }
852
853  public static void addInputPaths( Configuration conf, Iterable<Path> paths )
854    {
855    Path workingDirectory = getWorkingDirectory( conf );
856    String dirs = conf.get( "mapred.input.dir" );
857    StringBuilder buffer = new StringBuilder( dirs == null ? "" : dirs );
858
859    for( Path path : paths )
860      {
861      if( !path.isAbsolute() )
862        path = new Path( workingDirectory, path );
863
864      String dirStr = StringUtils.escapeString( path.toString() );
865
866      if( buffer.length() != 0 )
867        buffer.append( ',' );
868
869      buffer.append( dirStr );
870      }
871
872    conf.set( "mapred.input.dir", buffer.toString() );
873    }
874
875  public static void addInputPath( Configuration conf, Path path )
876    {
877    Path workingDirectory = getWorkingDirectory( conf );
878    path = new Path( workingDirectory, path );
879    String dirStr = StringUtils.escapeString( path.toString() );
880    String dirs = conf.get( "mapred.input.dir" );
881    conf.set( "mapred.input.dir", dirs == null ? dirStr :
882      dirs + StringUtils.COMMA_STR + dirStr );
883    }
884
885  public static void setOutputPath( Configuration conf, Path path )
886    {
887    Path workingDirectory = getWorkingDirectory( conf );
888    path = new Path( workingDirectory, path );
889    conf.set( "mapred.output.dir", path.toString() );
890    }
891
892  private static Path getWorkingDirectory( Configuration conf )
893    {
894    String name = conf.get( "mapred.working.dir" );
895    if( name != null )
896      {
897      return new Path( name );
898      }
899    else
900      {
901      try
902        {
903        Path dir = FileSystem.get( conf ).getWorkingDirectory();
904        conf.set( "mapred.working.dir", dir.toString() );
905        return dir;
906        }
907      catch( IOException e )
908        {
909        throw new RuntimeException( e );
910        }
911      }
912    }
913
914  public static Path getOutputPath( Configuration conf )
915    {
916    String name = conf.get( "mapred.output.dir" );
917    return name == null ? null : new Path( name );
918    }
919
920  public static String pack( Object object, Configuration conf )
921    {
922    if( object == null )
923      return "";
924
925    try
926      {
927      return serializeBase64( object, conf, true );
928      }
929    catch( IOException exception )
930      {
931      throw new FlowException( "unable to pack object: " + object.getClass().getCanonicalName(), exception );
932      }
933    }
934
935  public static void addFields( Configuration conf, String property, Map<Integer, Fields> fields )
936    {
937    if( fields == null || fields.isEmpty() )
938      return;
939
940    Map<String, Fields> toPack = new HashMap<>();
941
942    for( Map.Entry<Integer, Fields> entry : fields.entrySet() )
943      toPack.put( entry.getKey().toString(), entry.getValue() );
944
945    conf.set( property, pack( toPack, conf ) );
946    }
947
948  public static Map<Integer, Fields> getFields( Configuration conf, String property ) throws IOException
949    {
950    String value = conf.getRaw( property );
951
952    if( value == null || value.isEmpty() )
953      return Collections.emptyMap();
954
955    Map<String, Fields> map = deserializeBase64( value, conf, Map.class, true );
956    Map<Integer, Fields> result = new HashMap<>();
957
958    for( Map.Entry<String, Fields> entry : map.entrySet() )
959      result.put( Integer.parseInt( entry.getKey() ), entry.getValue() );
960
961    return result;
962    }
963
964  public static void addComparators( Configuration conf, String property, Map<String, Fields> map, BaseFlowStep flowStep, Group group )
965    {
966    Iterator<Fields> fieldsIterator = map.values().iterator();
967
968    if( !fieldsIterator.hasNext() )
969      return;
970
971    Fields fields = fieldsIterator.next();
972
973    if( fields.hasComparators() )
974      {
975      conf.set( property, pack( fields, conf ) );
976      return;
977      }
978
979    // use resolved fields if there are no comparators.
980    Set<Scope> previousScopes = flowStep.getPreviousScopes( group );
981
982    fields = previousScopes.iterator().next().getOutValuesFields();
983
984    if( fields.size() != 0 ) // allows fields.UNKNOWN to be used
985      conf.setInt( property + ".size", fields.size() );
986    }
987
988  public static void addComparators( Configuration conf, String property, Map<String, Fields> map, Fields resolvedFields )
989    {
990    Iterator<Fields> fieldsIterator = map.values().iterator();
991
992    if( !fieldsIterator.hasNext() )
993      return;
994
995    while( fieldsIterator.hasNext() )
996      {
997      Fields fields = fieldsIterator.next();
998
999      if( fields.hasComparators() )
1000        {
1001        conf.set( property, pack( fields, conf ) );
1002        return;
1003        }
1004      }
1005
1006    if( resolvedFields.size() != 0 ) // allows fields.UNKNOWN to be used
1007      conf.setInt( property + ".size", resolvedFields.size() );
1008    }
1009  }