001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.hadoop.util;
022
023import java.io.IOException;
024import java.io.UnsupportedEncodingException;
025import java.lang.reflect.Constructor;
026import java.lang.reflect.Field;
027import java.lang.reflect.InvocationTargetException;
028import java.net.URI;
029import java.net.URL;
030import java.util.Collection;
031import java.util.Collections;
032import java.util.HashMap;
033import java.util.HashSet;
034import java.util.Iterator;
035import java.util.List;
036import java.util.Map;
037import java.util.Properties;
038import java.util.Set;
039import java.util.jar.Attributes;
040import java.util.jar.Manifest;
041
042import cascading.CascadingException;
043import cascading.flow.FlowException;
044import cascading.flow.planner.BaseFlowStep;
045import cascading.flow.planner.PlatformInfo;
046import cascading.flow.planner.Scope;
047import cascading.pipe.Group;
048import cascading.scheme.hadoop.TextLine;
049import cascading.tap.hadoop.Hfs;
050import cascading.tuple.Fields;
051import cascading.util.LogUtil;
052import cascading.util.Util;
053import org.apache.commons.codec.binary.Base64;
054import org.apache.hadoop.conf.Configurable;
055import org.apache.hadoop.conf.Configuration;
056import org.apache.hadoop.fs.FileStatus;
057import org.apache.hadoop.fs.FileSystem;
058import org.apache.hadoop.fs.LocalFileSystem;
059import org.apache.hadoop.fs.Path;
060import org.apache.hadoop.mapred.JobConf;
061import org.apache.hadoop.util.StringUtils;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065import static cascading.util.Util.invokeInstanceMethod;
066
067/**
068 *
069 */
070public class HadoopUtil
071  {
072  public static final String CASCADING_FLOW_EXECUTING = "cascading.flow.executing";
073
074  private static final Logger LOG = LoggerFactory.getLogger( HadoopUtil.class );
075  private static final String ENCODING = "US-ASCII";
076  private static final Class<?> DEFAULT_OBJECT_SERIALIZER = JavaObjectSerializer.class;
077
078  private static PlatformInfo platformInfo;
079
080  public static void setIsInflow( Configuration conf )
081    {
082    conf.setBoolean( CASCADING_FLOW_EXECUTING, true );
083    }
084
085  public static boolean isInflow( Configuration conf )
086    {
087    return conf.getBoolean( CASCADING_FLOW_EXECUTING, false );
088    }
089
090  public static void initLog4j( JobConf configuration )
091    {
092    initLog4j( (Configuration) configuration );
093    }
094
095  public static void initLog4j( Configuration configuration )
096    {
097    String values = configuration.get( "log4j.logger", null );
098
099    if( values == null || values.length() == 0 )
100      return;
101
102    if( !Util.hasClass( "org.apache.log4j.Logger" ) )
103      {
104      LOG.info( "org.apache.log4j.Logger is not in the current CLASSPATH, not setting log4j.logger properties" );
105      return;
106      }
107
108    String[] elements = values.split( "," );
109
110    for( String element : elements )
111      LogUtil.setLog4jLevel( element.split( "=" ) );
112    }
113
114  // only place JobConf should ever be returned
115  public static JobConf asJobConfInstance( Configuration configuration )
116    {
117    if( configuration instanceof JobConf )
118      return (JobConf) configuration;
119
120    return new JobConf( configuration );
121    }
122
123  public static <C> C copyJobConf( C parentJobConf )
124    {
125    return copyConfiguration( parentJobConf );
126    }
127
128  public static JobConf copyJobConf( JobConf parentJobConf )
129    {
130    if( parentJobConf == null )
131      throw new IllegalArgumentException( "parent may not be null" );
132
133    // see https://github.com/Cascading/cascading/pull/21
134    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
135    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
136    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
137    final Configuration configurationCopy = new Configuration( parentJobConf );
138    final JobConf jobConf = new JobConf( configurationCopy );
139
140    jobConf.getCredentials().addAll( parentJobConf.getCredentials() );
141
142    return jobConf;
143    }
144
145  public static JobConf createJobConf( Map<Object, Object> properties )
146    {
147    return createJobConf( properties, null );
148    }
149
150  public static JobConf createJobConf( Map<Object, Object> properties, JobConf defaultJobconf )
151    {
152    JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf( defaultJobconf );
153
154    if( properties == null )
155      return jobConf;
156
157    return copyConfiguration( properties, jobConf );
158    }
159
160  public static <C> C copyConfiguration( C parent )
161    {
162    if( parent == null )
163      throw new IllegalArgumentException( "parent may not be null" );
164
165    if( !( parent instanceof Configuration ) )
166      throw new IllegalArgumentException( "parent must be of type Configuration" );
167
168    Configuration conf = (Configuration) parent;
169
170    // see https://github.com/Cascading/cascading/pull/21
171    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
172    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
173    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
174    Configuration configurationCopy = new Configuration( conf );
175
176    Configuration copiedConf = callCopyConstructor( parent.getClass(), configurationCopy );
177
178    if( Util.hasInstanceMethod( parent, "getCredentials", null ) )
179      {
180      Object result = invokeInstanceMethod( parent, "getCredentials", null, null );
181      Object credentials = invokeInstanceMethod( copiedConf, "getCredentials", null, null );
182
183      invokeInstanceMethod( credentials, "addAll", new Object[]{result}, new Class[]{credentials.getClass()} );
184      }
185
186    return (C) copiedConf;
187    }
188
189  protected static <C extends Configuration> C callCopyConstructor( Class type, Configuration parent )
190    {
191    try
192      {
193      Constructor<C> constructor = type.getConstructor( parent.getClass() );
194
195      return constructor.newInstance( parent );
196      }
197    catch( NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException exception )
198      {
199      throw new CascadingException( "unable to create copy of: " + type );
200      }
201    }
202
203  public static <C extends Configuration> C copyConfiguration( Map<Object, Object> srcProperties, C dstConfiguration )
204    {
205    Set<Object> keys = new HashSet<Object>( srcProperties.keySet() );
206
207    // keys will only be grabbed if both key/value are String, so keep orig keys
208    if( srcProperties instanceof Properties )
209      keys.addAll( ( (Properties) srcProperties ).stringPropertyNames() );
210
211    for( Object key : keys )
212      {
213      Object value = srcProperties.get( key );
214
215      if( value == null && srcProperties instanceof Properties && key instanceof String )
216        value = ( (Properties) srcProperties ).getProperty( (String) key );
217
218      if( value == null ) // don't stuff null values
219        continue;
220
221      // don't let these objects pass, even though toString is called below.
222      if( value instanceof Class || value instanceof JobConf )
223        continue;
224
225      dstConfiguration.set( key.toString(), value.toString() );
226      }
227
228    return dstConfiguration;
229    }
230
231  public static Map<Object, Object> createProperties( Configuration jobConf )
232    {
233    Map<Object, Object> properties = new HashMap<Object, Object>();
234
235    if( jobConf == null )
236      return properties;
237
238    for( Map.Entry<String, String> entry : jobConf )
239      properties.put( entry.getKey(), entry.getValue() );
240
241    return properties;
242    }
243
244  public static Thread getHDFSShutdownHook()
245    {
246    Exception caughtException;
247
248    try
249      {
250      // we must init the FS so the finalizer is registered
251      FileSystem.getLocal( new JobConf() );
252
253      Field field = FileSystem.class.getDeclaredField( "clientFinalizer" );
254      field.setAccessible( true );
255
256      Thread finalizer = (Thread) field.get( null );
257
258      if( finalizer != null )
259        Runtime.getRuntime().removeShutdownHook( finalizer );
260
261      return finalizer;
262      }
263    catch( NoSuchFieldException exception )
264      {
265      caughtException = exception;
266      }
267    catch( IllegalAccessException exception )
268      {
269      caughtException = exception;
270      }
271    catch( IOException exception )
272      {
273      caughtException = exception;
274      }
275
276    LOG.debug( "unable to find and remove client hdfs shutdown hook, received exception: {}", caughtException.getClass().getName() );
277
278    return null;
279    }
280
281  public static String encodeBytes( byte[] bytes )
282    {
283    try
284      {
285      return new String( Base64.encodeBase64( bytes ), ENCODING );
286      }
287    catch( UnsupportedEncodingException exception )
288      {
289      throw new RuntimeException( exception );
290      }
291    }
292
293  public static byte[] decodeBytes( String string )
294    {
295    try
296      {
297      byte[] bytes = string.getBytes( ENCODING );
298      return Base64.decodeBase64( bytes );
299      }
300    catch( UnsupportedEncodingException exception )
301      {
302      throw new RuntimeException( exception );
303      }
304    }
305
306  public static <T> ObjectSerializer instantiateSerializer( Configuration conf, Class<T> type ) throws ClassNotFoundException
307    {
308    Class<ObjectSerializer> flowSerializerClass;
309
310    String serializerClassName = conf.get( ObjectSerializer.OBJECT_SERIALIZER_PROPERTY );
311
312    if( serializerClassName == null || serializerClassName.length() == 0 )
313      flowSerializerClass = (Class<ObjectSerializer>) DEFAULT_OBJECT_SERIALIZER;
314    else
315      flowSerializerClass = (Class<ObjectSerializer>) Class.forName( serializerClassName );
316
317    ObjectSerializer objectSerializer;
318
319    try
320      {
321      objectSerializer = flowSerializerClass.newInstance();
322
323      if( objectSerializer instanceof Configurable )
324        ( (Configurable) objectSerializer ).setConf( conf );
325      }
326    catch( Exception exception )
327      {
328      exception.printStackTrace();
329      throw new IllegalArgumentException( "Unable to instantiate serializer \""
330        + flowSerializerClass.getName()
331        + "\" for class: "
332        + type.getName() );
333      }
334
335    if( !objectSerializer.accepts( type ) )
336      throw new IllegalArgumentException( serializerClassName + " won't accept objects of class " + type.toString() );
337
338    return objectSerializer;
339    }
340
341  public static <T> String serializeBase64( T object, Configuration conf ) throws IOException
342    {
343    return serializeBase64( object, conf, true );
344    }
345
346  public static <T> String serializeBase64( T object, Configuration conf, boolean compress ) throws IOException
347    {
348    ObjectSerializer objectSerializer;
349
350    try
351      {
352      objectSerializer = instantiateSerializer( conf, object.getClass() );
353      }
354    catch( ClassNotFoundException exception )
355      {
356      throw new IOException( exception );
357      }
358
359    return encodeBytes( objectSerializer.serialize( object, compress ) );
360    }
361
362  /**
363   * This method deserializes the Base64 encoded String into an Object instance.
364   *
365   * @param string
366   * @return an Object
367   */
368  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type ) throws IOException
369    {
370    return deserializeBase64( string, conf, type, true );
371    }
372
373  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type, boolean decompress ) throws IOException
374    {
375    if( string == null || string.length() == 0 )
376      return null;
377
378    ObjectSerializer objectSerializer;
379
380    try
381      {
382      objectSerializer = instantiateSerializer( conf, type );
383      }
384    catch( ClassNotFoundException exception )
385      {
386      throw new IOException( exception );
387      }
388
389    return objectSerializer.deserialize( decodeBytes( string ), type, decompress );
390    }
391
392  public static Class findMainClass( Class defaultType )
393    {
394    return Util.findMainClass( defaultType, "org.apache.hadoop" );
395    }
396
397  public static Map<String, String> getConfig( Configuration defaultConf, Configuration updatedConf )
398    {
399    Map<String, String> configs = new HashMap<String, String>();
400
401    for( Map.Entry<String, String> entry : updatedConf )
402      configs.put( entry.getKey(), entry.getValue() );
403
404    for( Map.Entry<String, String> entry : defaultConf )
405      {
406      if( entry.getValue() == null )
407        continue;
408
409      String updatedValue = configs.get( entry.getKey() );
410
411      // if both null, lets purge from map to save space
412      if( updatedValue == null && entry.getValue() == null )
413        configs.remove( entry.getKey() );
414
415      // if the values are the same, lets also purge from map to save space
416      if( updatedValue != null && updatedValue.equals( entry.getValue() ) )
417        configs.remove( entry.getKey() );
418
419      configs.remove( "mapred.working.dir" );
420      configs.remove( "mapreduce.job.working.dir" ); // hadoop2
421      }
422
423    return configs;
424    }
425
426  public static JobConf[] getJobConfs( Configuration job, List<Map<String, String>> configs )
427    {
428    JobConf[] jobConfs = new JobConf[ configs.size() ];
429
430    for( int i = 0; i < jobConfs.length; i++ )
431      jobConfs[ i ] = (JobConf) mergeConf( job, configs.get( i ), false );
432
433    return jobConfs;
434    }
435
436  public static <J extends Configuration> J mergeConf( J job, Map<String, String> config, boolean directly )
437    {
438    Configuration currentConf = directly ? job : ( job instanceof JobConf ? copyJobConf( (JobConf) job ) : new Configuration( job ) );
439
440    for( String key : config.keySet() )
441      {
442      if( LOG.isDebugEnabled() )
443        LOG.debug( "merging key: {} value: {}", key, config.get( key ) );
444
445      currentConf.set( key, config.get( key ) );
446      }
447
448    return (J) currentConf;
449    }
450
451  public static Configuration removePropertiesFrom( Configuration jobConf, String... keys )
452    {
453    Map<Object, Object> properties = createProperties( jobConf );
454
455    for( String key : keys )
456      properties.remove( key );
457
458    return copyConfiguration( properties, new JobConf() );
459    }
460
461  public static boolean removeStateFromDistCache( Configuration conf, String path ) throws IOException
462    {
463    return new Hfs( new TextLine(), path ).deleteResource( conf );
464    }
465
466  public static PlatformInfo getPlatformInfo()
467    {
468    if( platformInfo == null )
469      platformInfo = getPlatformInfoInternal( JobConf.class, "org/apache/hadoop", "Hadoop" );
470
471    return platformInfo;
472    }
473
474  public static PlatformInfo getPlatformInfo( Class type, String attributePath, String platformName )
475    {
476    if( platformInfo == null )
477      platformInfo = getPlatformInfoInternal( type, attributePath, platformName );
478
479    return platformInfo;
480    }
481
482  public static PlatformInfo createPlatformInfo( Class type, String attributePath, String platformName )
483    {
484    return getPlatformInfoInternal( type, attributePath, platformName );
485    }
486
487  private static PlatformInfo getPlatformInfoInternal( Class type, String attributePath, String platformName )
488    {
489    URL url = type.getResource( type.getSimpleName() + ".class" );
490
491    if( url == null || !url.toString().startsWith( "jar" ) )
492      return new PlatformInfo( platformName, null, null );
493
494    String path = url.toString();
495    path = path.substring( 0, path.lastIndexOf( "!" ) + 1 );
496
497    String manifestPath = path + "/META-INF/MANIFEST.MF";
498    String parsedVersion = Util.findVersion( path.substring( 0, path.length() - 1 ) );
499
500    Manifest manifest;
501
502    try
503      {
504      manifest = new Manifest( new URL( manifestPath ).openStream() );
505      }
506    catch( IOException exception )
507      {
508      LOG.warn( "unable to get manifest from {}: {}", manifestPath, exception.getMessage() );
509
510      return new PlatformInfo( platformName, null, parsedVersion );
511      }
512
513    Attributes attributes = manifest.getAttributes( attributePath );
514
515    if( attributes == null )
516      attributes = manifest.getMainAttributes();
517
518    if( attributes == null )
519      {
520      LOG.debug( "unable to get platform manifest attributes" );
521      return new PlatformInfo( platformName, null, parsedVersion );
522      }
523
524    String vendor = attributes.getValue( "Implementation-Vendor" );
525    String version = attributes.getValue( "Implementation-Version" );
526
527    if( Util.isEmpty( version ) )
528      version = parsedVersion;
529
530    return new PlatformInfo( platformName, vendor, version );
531    }
532
533  /**
534   * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are
535   * changed to match the local 'from' path.
536   * <p>
537   * Returns a map of file-name to remote modification times if the remote time is different than the local time.
538   *
539   * @param config
540   * @param commonPaths
541   * @param syncTimes
542   */
543  public static Map<String, Long> syncPaths( Configuration config, Map<Path, Path> commonPaths, boolean syncTimes )
544    {
545    if( commonPaths == null )
546      return Collections.emptyMap();
547
548    Map<String, Long> timestampMap = new HashMap<>();
549
550    Map<Path, Path> copyPaths = getCopyPaths( config, commonPaths ); // tests remote file existence or if stale
551
552    LocalFileSystem localFS = getLocalFS( config );
553    FileSystem remoteFS = getDefaultFS( config );
554
555    for( Map.Entry<Path, Path> entry : copyPaths.entrySet() )
556      {
557      Path localPath = entry.getKey();
558      Path remotePath = entry.getValue();
559
560      try
561        {
562        LOG.info( "copying from: {}, to: {}", localPath, remotePath );
563        remoteFS.copyFromLocalFile( localPath, remotePath );
564
565        if( !syncTimes )
566          {
567          timestampMap.put( remotePath.getName(), remoteFS.getFileStatus( remotePath ).getModificationTime() );
568          continue;
569          }
570        }
571      catch( IOException exception )
572        {
573        throw new FlowException( "unable to copy local: " + localPath + " to remote: " + remotePath, exception );
574        }
575
576      FileStatus localFileStatus = null;
577
578      try
579        {
580        // sync the modified times so we can lazily upload jars to hdfs after job is started
581        // otherwise modified time will be local to hdfs
582        localFileStatus = localFS.getFileStatus( localPath );
583        remoteFS.setTimes( remotePath, localFileStatus.getModificationTime(), -1 ); // don't set the access time
584        }
585      catch( IOException exception )
586        {
587        LOG.info( "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.", remotePath );
588
589        if( localFileStatus != null )
590          timestampMap.put( remotePath.getName(), localFileStatus.getModificationTime() );
591        }
592      }
593
594    return timestampMap;
595    }
596
597  public static Map<Path, Path> getCommonPaths( Map<String, Path> localPaths, Map<String, Path> remotePaths )
598    {
599    Map<Path, Path> commonPaths = new HashMap<Path, Path>();
600
601    for( Map.Entry<String, Path> entry : localPaths.entrySet() )
602      {
603      if( remotePaths.containsKey( entry.getKey() ) )
604        commonPaths.put( entry.getValue(), remotePaths.get( entry.getKey() ) );
605      }
606
607    return commonPaths;
608    }
609
610  private static Map<Path, Path> getCopyPaths( Configuration config, Map<Path, Path> commonPaths )
611    {
612    Map<Path, Path> copyPaths = new HashMap<Path, Path>();
613
614    FileSystem remoteFS = getDefaultFS( config );
615    FileSystem localFS = getLocalFS( config );
616
617    for( Map.Entry<Path, Path> entry : commonPaths.entrySet() )
618      {
619      Path localPath = entry.getKey();
620      Path remotePath = entry.getValue();
621
622      try
623        {
624        boolean localExists = localFS.exists( localPath );
625        boolean remoteExist = remoteFS.exists( remotePath );
626
627        if( localExists && !remoteExist )
628          {
629          copyPaths.put( localPath, remotePath );
630          }
631        else if( localExists )
632          {
633          long localModTime = localFS.getFileStatus( localPath ).getModificationTime();
634          long remoteModTime = remoteFS.getFileStatus( remotePath ).getModificationTime();
635
636          if( localModTime > remoteModTime )
637            copyPaths.put( localPath, remotePath );
638          }
639        }
640      catch( IOException exception )
641        {
642        throw new FlowException( "unable to get handle to underlying filesystem", exception );
643        }
644      }
645
646    return copyPaths;
647    }
648
649  public static void resolvePaths( Configuration config, Collection<String> classpath, String remoteRoot, String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths )
650    {
651    FileSystem defaultFS = getDefaultFS( config );
652    FileSystem localFS = getLocalFS( config );
653
654    Path remoteRootPath = new Path( remoteRoot == null ? "./.staging" : remoteRoot );
655
656    if( resourceSubPath != null )
657      remoteRootPath = new Path( remoteRootPath, resourceSubPath );
658
659    remoteRootPath = defaultFS.makeQualified( remoteRootPath );
660
661    boolean defaultIsLocal = defaultFS.equals( localFS );
662
663    for( String stringPath : classpath )
664      {
665      Path path = new Path( stringPath );
666
667      URI uri = path.toUri();
668
669      if( uri.getScheme() == null && !defaultIsLocal ) // we want to sync
670        {
671        Path localPath = localFS.makeQualified( path );
672
673        if( !exists( localFS, localPath ) )
674          throw new FlowException( "path not found: " + localPath );
675
676        String name = localPath.getName();
677
678        if( resourceSubPath != null )
679          name = resourceSubPath + "/" + name;
680
681        localPaths.put( name, localPath );
682        remotePaths.put( name, defaultFS.makeQualified( new Path( remoteRootPath, path.getName() ) ) );
683        }
684      else if( localFS.equals( getFileSystem( config, path ) ) )
685        {
686        if( !exists( localFS, path ) )
687          throw new FlowException( "path not found: " + path );
688
689        Path localPath = localFS.makeQualified( path );
690
691        String name = localPath.getName();
692
693        if( resourceSubPath != null )
694          name = resourceSubPath + "/" + name;
695
696        localPaths.put( name, localPath );
697        }
698      else
699        {
700        if( !exists( defaultFS, path ) )
701          throw new FlowException( "path not found: " + path );
702
703        Path defaultPath = defaultFS.makeQualified( path );
704
705        String name = defaultPath.getName();
706
707        if( resourceSubPath != null )
708          name = resourceSubPath + "/" + name;
709
710        remotePaths.put( name, defaultPath );
711        }
712      }
713    }
714
715  private static boolean exists( FileSystem fileSystem, Path path )
716    {
717    try
718      {
719      return fileSystem.exists( path );
720      }
721    catch( IOException exception )
722      {
723      throw new FlowException( "could not test file exists: " + path );
724      }
725    }
726
727  private static FileSystem getFileSystem( Configuration config, Path path )
728    {
729    try
730      {
731      return path.getFileSystem( config );
732      }
733    catch( IOException exception )
734      {
735      throw new FlowException( "unable to get handle to underlying filesystem", exception );
736      }
737    }
738
739  public static LocalFileSystem getLocalFS( Configuration config )
740    {
741    try
742      {
743      return FileSystem.getLocal( config );
744      }
745    catch( IOException exception )
746      {
747      throw new FlowException( "unable to get handle to underlying filesystem", exception );
748      }
749    }
750
751  public static FileSystem getDefaultFS( Configuration config )
752    {
753    try
754      {
755      return FileSystem.get( config );
756      }
757    catch( IOException exception )
758      {
759      throw new FlowException( "unable to get handle to underlying filesystem", exception );
760      }
761    }
762
763  public static boolean isLocal( Configuration conf )
764    {
765    // hadoop 1.0 and 2.0 use different properties to define local mode: we check the new YARN
766    // property first
767    String frameworkName = conf.get( "mapreduce.framework.name" );
768
769    // we are running on hadoop 2.0 (YARN)
770    if( frameworkName != null )
771      return frameworkName.equals( "local" );
772
773    // for Tez
774    String tezLocal = conf.get( "tez.local.mode" );
775
776    if( tezLocal != null )
777      return tezLocal.equals( "true" );
778
779    // hadoop 1.0: use the old property to determine the local mode
780    String hadoop1 = conf.get( "mapred.job.tracker" );
781
782    if( hadoop1 == null )
783      {
784      LOG.warn( "could not successfully test if Hadoop based platform is in standalone/local mode, no valid properties set, returning false - tests for: mapreduce.framework.name, tez.local.mode, and mapred.job.tracker" );
785      return false;
786      }
787
788    return hadoop1.equals( "local" );
789    }
790
791  public static boolean isYARN( Configuration conf )
792    {
793    return conf.get( "mapreduce.framework.name" ) != null;
794    }
795
796  public static void setLocal( Configuration conf )
797    {
798    // set both properties to local
799    conf.set( "mapred.job.tracker", "local" );
800
801    // yarn
802    conf.set( "mapreduce.framework.name", "local" );
803
804    // tez
805    conf.set( "tez.local.mode", "true" );
806    conf.set( "tez.runtime.optimize.local.fetch", "true" );
807    }
808
809  private static boolean interfaceAssignableFromClassName( Class<?> xface, String className )
810    {
811    if( ( className == null ) || ( xface == null ) )
812      return false;
813
814    try
815      {
816      Class<?> klass = Class.forName( className );
817      if( klass == null )
818        return false;
819
820      if( !xface.isAssignableFrom( klass ) )
821        return false;
822
823      return true;
824      }
825    catch( ClassNotFoundException cnfe )
826      {
827      return false; // let downstream figure it out
828      }
829    }
830
831  public static boolean setNewApi( Configuration conf, String className )
832    {
833    if( className == null ) // silently return and let the error be caught downstream
834      return false;
835
836    boolean isStable = className.startsWith( "org.apache.hadoop.mapred." )
837      || interfaceAssignableFromClassName( org.apache.hadoop.mapred.InputFormat.class, className );
838
839    boolean isNew = className.startsWith( "org.apache.hadoop.mapreduce." )
840      || interfaceAssignableFromClassName( org.apache.hadoop.mapreduce.InputFormat.class, className );
841
842    if( isStable )
843      conf.setBoolean( "mapred.mapper.new-api", false );
844    else if( isNew )
845      conf.setBoolean( "mapred.mapper.new-api", true );
846    else
847      throw new IllegalStateException( "cannot determine if class denotes stable or new api, please set 'mapred.mapper.new-api' to the appropriate value" );
848
849    return true;
850    }
851
852  public static void addInputPaths( Configuration conf, Iterable<Path> paths )
853    {
854    Path workingDirectory = getWorkingDirectory( conf );
855    String dirs = conf.get( "mapred.input.dir" );
856    StringBuilder buffer = new StringBuilder( dirs == null ? "" : dirs );
857
858    for( Path path : paths )
859      {
860      if( !path.isAbsolute() )
861        path = new Path( workingDirectory, path );
862
863      String dirStr = StringUtils.escapeString( path.toString() );
864
865      if( buffer.length() != 0 )
866        buffer.append( ',' );
867
868      buffer.append( dirStr );
869      }
870
871    conf.set( "mapred.input.dir", buffer.toString() );
872    }
873
874  public static void addInputPath( Configuration conf, Path path )
875    {
876    Path workingDirectory = getWorkingDirectory( conf );
877    path = new Path( workingDirectory, path );
878    String dirStr = StringUtils.escapeString( path.toString() );
879    String dirs = conf.get( "mapred.input.dir" );
880    conf.set( "mapred.input.dir", dirs == null ? dirStr :
881      dirs + StringUtils.COMMA_STR + dirStr );
882    }
883
884  public static void setOutputPath( Configuration conf, Path path )
885    {
886    Path workingDirectory = getWorkingDirectory( conf );
887    path = new Path( workingDirectory, path );
888    conf.set( "mapred.output.dir", path.toString() );
889    }
890
891  private static Path getWorkingDirectory( Configuration conf )
892    {
893    String name = conf.get( "mapred.working.dir" );
894    if( name != null )
895      {
896      return new Path( name );
897      }
898    else
899      {
900      try
901        {
902        Path dir = FileSystem.get( conf ).getWorkingDirectory();
903        conf.set( "mapred.working.dir", dir.toString() );
904        return dir;
905        }
906      catch( IOException e )
907        {
908        throw new RuntimeException( e );
909        }
910      }
911    }
912
913  public static Path getOutputPath( Configuration conf )
914    {
915    String name = conf.get( "mapred.output.dir" );
916    return name == null ? null : new Path( name );
917    }
918
919  public static String pack( Object object, Configuration conf )
920    {
921    if( object == null )
922      return "";
923
924    try
925      {
926      return serializeBase64( object, conf, true );
927      }
928    catch( IOException exception )
929      {
930      throw new FlowException( "unable to pack object: " + object.getClass().getCanonicalName(), exception );
931      }
932    }
933
934  public static void addFields( Configuration conf, String property, Map<Integer, Fields> fields )
935    {
936    if( fields == null || fields.isEmpty() )
937      return;
938
939    Map<String, Fields> toPack = new HashMap<>();
940
941    for( Map.Entry<Integer, Fields> entry : fields.entrySet() )
942      toPack.put( entry.getKey().toString(), entry.getValue() );
943
944    conf.set( property, pack( toPack, conf ) );
945    }
946
947  public static Map<Integer, Fields> getFields( Configuration conf, String property ) throws IOException
948    {
949    String value = conf.getRaw( property );
950
951    if( value == null || value.isEmpty() )
952      return Collections.emptyMap();
953
954    Map<String, Fields> map = deserializeBase64( value, conf, Map.class, true );
955    Map<Integer, Fields> result = new HashMap<>();
956
957    for( Map.Entry<String, Fields> entry : map.entrySet() )
958      result.put( Integer.parseInt( entry.getKey() ), entry.getValue() );
959
960    return result;
961    }
962
963  public static void addComparators( Configuration conf, String property, Map<String, Fields> map, BaseFlowStep flowStep, Group group )
964    {
965    Iterator<Fields> fieldsIterator = map.values().iterator();
966
967    if( !fieldsIterator.hasNext() )
968      return;
969
970    Fields fields = fieldsIterator.next();
971
972    if( fields.hasComparators() )
973      {
974      conf.set( property, pack( fields, conf ) );
975      return;
976      }
977
978    // use resolved fields if there are no comparators.
979    Set<Scope> previousScopes = flowStep.getPreviousScopes( group );
980
981    fields = previousScopes.iterator().next().getOutValuesFields();
982
983    if( fields.size() != 0 ) // allows fields.UNKNOWN to be used
984      conf.setInt( property + ".size", fields.size() );
985    }
986
987  public static void addComparators( Configuration conf, String property, Map<String, Fields> map, Fields resolvedFields )
988    {
989    Iterator<Fields> fieldsIterator = map.values().iterator();
990
991    if( !fieldsIterator.hasNext() )
992      return;
993
994    while( fieldsIterator.hasNext() )
995      {
996      Fields fields = fieldsIterator.next();
997
998      if( fields.hasComparators() )
999        {
1000        conf.set( property, pack( fields, conf ) );
1001        return;
1002        }
1003      }
1004
1005    if( resolvedFields.size() != 0 ) // allows fields.UNKNOWN to be used
1006      conf.setInt( property + ".size", resolvedFields.size() );
1007    }
1008  }