001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.tuple.hadoop;
022
023import java.io.DataInputStream;
024import java.io.DataOutputStream;
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.HashMap;
031import java.util.LinkedList;
032import java.util.List;
033import java.util.Map;
034
035import cascading.CascadingException;
036import cascading.flow.FlowProcess;
037import cascading.flow.FlowProps;
038import cascading.flow.hadoop.util.HadoopUtil;
039import cascading.tuple.Comparison;
040import cascading.tuple.Fields;
041import cascading.tuple.Tuple;
042import cascading.tuple.TupleException;
043import cascading.tuple.hadoop.io.HadoopTupleOutputStream;
044import cascading.tuple.hadoop.io.IndexTupleDeserializer;
045import cascading.tuple.hadoop.io.IndexTupleSerializer;
046import cascading.tuple.hadoop.io.KeyIndexTupleDeserializer;
047import cascading.tuple.hadoop.io.KeyIndexTupleSerializer;
048import cascading.tuple.hadoop.io.KeyTupleDeserializer;
049import cascading.tuple.hadoop.io.KeyTupleSerializer;
050import cascading.tuple.hadoop.io.TupleDeserializer;
051import cascading.tuple.hadoop.io.TuplePairDeserializer;
052import cascading.tuple.hadoop.io.TuplePairSerializer;
053import cascading.tuple.hadoop.io.TupleSerializer;
054import cascading.tuple.hadoop.io.ValueIndexTupleDeserializer;
055import cascading.tuple.hadoop.io.ValueIndexTupleSerializer;
056import cascading.tuple.hadoop.io.ValueTupleDeserializer;
057import cascading.tuple.hadoop.io.ValueTupleSerializer;
058import cascading.tuple.io.IndexTuple;
059import cascading.tuple.io.KeyIndexTuple;
060import cascading.tuple.io.KeyTuple;
061import cascading.tuple.io.TupleInputStream;
062import cascading.tuple.io.TupleOutputStream;
063import cascading.tuple.io.TuplePair;
064import cascading.tuple.io.ValueIndexTuple;
065import cascading.tuple.io.ValueTuple;
066import cascading.util.Util;
067import org.apache.hadoop.conf.Configuration;
068import org.apache.hadoop.conf.Configured;
069import org.apache.hadoop.io.WritableUtils;
070import org.apache.hadoop.io.serializer.Deserializer;
071import org.apache.hadoop.io.serializer.Serialization;
072import org.apache.hadoop.io.serializer.SerializationFactory;
073import org.apache.hadoop.io.serializer.Serializer;
074import org.apache.hadoop.io.serializer.WritableSerialization;
075import org.apache.hadoop.util.ReflectionUtils;
076import org.slf4j.Logger;
077import org.slf4j.LoggerFactory;
078
079import static cascading.tuple.hadoop.TupleSerializationProps.HADOOP_IO_SERIALIZATIONS;
080
081/**
082 * Class TupleSerialization is an implementation of Hadoop's {@link Serialization} interface.
083 * <p>
084 * Typically developers will not use this implementation directly as it is automatically added
085 * to any relevant MapReduce jobs via the {@link org.apache.hadoop.conf.Configuration}.
086 * <p>
087 * By default, all primitive types are natively handled, and {@link org.apache.hadoop.io.BytesWritable}
088 * has a pre-configured serialization token since byte arrays are not handled natively by {@link Tuple}.
089 * <p>
090 * To add or manipulate Hadoop serializations or Cascading serializations tokens, see
091 * {@link TupleSerializationProps} for a fluent property builder class.
092 * <p>
093 * By default this Serialization interface registers the class {@link org.apache.hadoop.io.ByteWritable} as
094 * token 127.
095 */
096@SerializationToken(
097  tokens = {127},
098  classNames = {"org.apache.hadoop.io.BytesWritable"})
099public class TupleSerialization extends Configured implements Serialization
100  {
101
102  /** Field LOG */
103  private static final Logger LOG = LoggerFactory.getLogger( TupleSerialization.class );
104
105  /** Field defaultComparator * */
106  private Comparator defaultComparator;
107  /** Field classCache */
108  private final Map<String, Class> classCache = new HashMap<String, Class>();
109  /** Field serializationFactory */
110  private SerializationFactory serializationFactory;
111
112  /** Field tokenClassesMap */
113  private HashMap<Integer, String> tokenClassesMap;
114  /** Field classesTokensMap */
115  private HashMap<String, Integer> classesTokensMap;
116  /** Field tokenMapSize */
117  private long tokensSize = 0;
118
119  List<Integer> ordinals;
120
121  Map<Integer, Fields> keyFieldsMap;
122  Map<Integer, Fields> sortFieldsMap;
123  Map<Integer, Fields> valueFieldsMap;
124
125  Fields keyFields;
126  Fields sortFields;
127  Fields valueFields;
128
129  Boolean typesRequired; // for testing purposes
130  Boolean typesIgnored; // for testing purposes
131
132  static String getSerializationTokens( Configuration jobConf )
133    {
134    return jobConf.get( TupleSerializationProps.SERIALIZATION_TOKENS );
135    }
136
137  /**
138   * Adds this class as a Hadoop Serialization class. This method is safe to call redundantly.
139   * <p>
140   * This method will guarantee  and {@link WritableSerialization} are
141   * first in the list, as both are required.
142   *
143   * @param jobConf of type JobConf
144   */
145  public static void setSerializations( Configuration jobConf )
146    {
147    setSerializations( jobConf, Collections.emptySet() );
148    }
149
150  public static void setSerializations( Configuration jobConf, Collection<String> provided )
151    {
152    String serializations = getSerializations( jobConf );
153
154    LinkedList<String> list = new LinkedList<String>();
155
156    list.addAll( provided );
157
158    if( serializations != null && !serializations.isEmpty() )
159      Collections.addAll( list, serializations.split( "," ) );
160
161    // required by MultiInputSplit
162    String writable = WritableSerialization.class.getName();
163    String tuple = TupleSerialization.class.getName();
164
165    list.remove( writable );
166    list.remove( tuple );
167
168    list.addFirst( writable );
169    list.addFirst( tuple );
170
171    // make writable last
172    jobConf.set( HADOOP_IO_SERIALIZATIONS, Util.join( list, "," ) );
173    }
174
175  static String getSerializations( Configuration jobConf )
176    {
177    return jobConf.get( HADOOP_IO_SERIALIZATIONS, null );
178    }
179
180  public static Comparator getDefaultComparator( Comparator comparator, Configuration jobConf )
181    {
182    String typeName = jobConf.get( FlowProps.DEFAULT_ELEMENT_COMPARATOR );
183
184    if( Util.isEmpty( typeName ) )
185      return null;
186
187    if( comparator == null )
188      return createComparator( jobConf, typeName );
189
190    if( comparator.getClass().getName().equals( typeName ) && !( comparator instanceof Configured ) )
191      return comparator;
192
193    return createComparator( jobConf, typeName );
194    }
195
196  public static Comparator getDefaultComparator( Configuration jobConf )
197    {
198    String typeName = jobConf.get( FlowProps.DEFAULT_ELEMENT_COMPARATOR );
199
200    if( Util.isEmpty( typeName ) )
201      return null;
202
203    return createComparator( jobConf, typeName );
204    }
205
206  private static Comparator createComparator( Configuration jobConf, String typeName )
207    {
208    LOG.debug( "using default comparator: {}", typeName );
209
210    try
211      {
212      Class<Comparator> type = (Class<Comparator>) TupleSerialization.class.getClassLoader().loadClass( typeName );
213
214      return ReflectionUtils.newInstance( type, jobConf );
215      }
216    catch( ClassNotFoundException exception )
217      {
218      throw new CascadingException( "unable to load class: " + typeName, exception );
219      }
220    }
221
222  /** Constructor TupleSerialization creates a new TupleSerialization instance. */
223  public TupleSerialization()
224    {
225    }
226
227  public TupleSerialization( final FlowProcess<? extends Configuration> flowProcess )
228    {
229    super( new Configuration()
230      {
231      @Override
232      public String get( String name )
233        {
234        return get( name, null );
235        }
236
237      @Override
238      public String get( String name, String defaultValue )
239        {
240        Object value = flowProcess.getProperty( name );
241        return value == null ? defaultValue : String.valueOf( value );
242        }
243      } );
244    }
245
246  /**
247   * Constructor TupleSerialization creates a new TupleSerialization instance.
248   *
249   * @param conf of type Configuration
250   */
251  public TupleSerialization( Configuration conf )
252    {
253    super( conf );
254    }
255
256  @Override
257  public void setConf( Configuration conf )
258    {
259    super.setConf( conf );
260
261    if( conf != null )
262      defaultComparator = getDefaultComparator( conf );
263    }
264
265  @Override
266  public Configuration getConf()
267    {
268    if( super.getConf() == null )
269      setConf( new Configuration() );
270
271    return super.getConf();
272    }
273
274  public boolean areTypesIgnored()
275    {
276    if( typesIgnored == null )
277      {
278      typesIgnored = getConf().getBoolean( TupleSerializationProps.IGNORE_TYPES, false );
279
280      if( typesIgnored )
281        LOG.info( "types are being ignored during serialization" );
282      }
283
284    return typesIgnored;
285    }
286
287  public boolean areTypesRequired()
288    {
289    if( typesRequired == null )
290      {
291      typesRequired = getConf().getBoolean( TupleSerializationProps.REQUIRE_TYPES, false );
292
293      if( typesRequired )
294        LOG.info( "types are being enforced during serialization" );
295      }
296
297    return typesRequired;
298    }
299
300  SerializationFactory getSerializationFactory()
301    {
302    if( serializationFactory == null )
303      serializationFactory = new SerializationFactory( getConf() );
304
305    return serializationFactory;
306    }
307
308  public Fields getKeyFields()
309    {
310    if( keyFields == null && getFirstOrdinal() != null )
311      keyFields = getKeyFieldsMap().get( getFirstOrdinal() );
312
313    return keyFields;
314    }
315
316  public Class[] getKeyTypes()
317    {
318    Fields fields = getKeyFields();
319
320    return getTypesFor( fields );
321    }
322
323  public Class[] getTypesFor( Fields fields )
324    {
325    if( areTypesIgnored() || fields == null )
326      return null;
327
328    return fields.getTypesClasses();
329    }
330
331  public Fields getSortFields()
332    {
333    if( sortFields == null && getFirstOrdinal() != null )
334      sortFields = getSortFieldsMap().get( getFirstOrdinal() );
335
336    return sortFields;
337    }
338
339  public Class[] getSortTypes()
340    {
341    return getTypesFor( getSortFields() );
342    }
343
344  public Fields getValueFields()
345    {
346    if( valueFields == null && getFirstOrdinal() != null )
347      valueFields = getValueFieldsMap().get( getFirstOrdinal() );
348
349    return valueFields;
350    }
351
352  public Fields getMaskedValueFields()
353    {
354    return maskVoid( getValueFields(), getKeyFields() );
355    }
356
357  public Class[] getValueTypes()
358    {
359    return getTypesFor( getValueFields() );
360    }
361
362  public Map<Integer, Class[]> getKeyTypeMap()
363    {
364    if( areTypesIgnored() || getKeyFieldsMap() == null )
365      return Collections.emptyMap();
366
367    Map<Integer, Class[]> map = new HashMap<>();
368
369    for( Map.Entry<Integer, Fields> entry : getKeyFieldsMap().entrySet() )
370      map.put( entry.getKey(), entry.getValue().getTypesClasses() );
371
372    return map;
373    }
374
375  public Map<Integer, Class[]> getValueTypeMap()
376    {
377    if( areTypesIgnored() || getValueFieldsMap() == null )
378      return Collections.emptyMap();
379
380    Map<Integer, Class[]> map = new HashMap<>();
381
382    for( Map.Entry<Integer, Fields> entry : getValueFieldsMap().entrySet() )
383      map.put( entry.getKey(), entry.getValue().getTypesClasses() );
384
385    return map;
386    }
387
388  public Map<Integer, Class[]> getMaskedValueTypeMap()
389    {
390    if( areTypesIgnored() || getValueFieldsMap() == null )
391      return Collections.emptyMap();
392
393    Map<Integer, Fields> keyFieldsMap = getKeyFieldsMap();
394
395    if( keyFieldsMap == null || keyFieldsMap.isEmpty() )
396      return getValueTypeMap();
397
398    Map<Integer, Class[]> map = new HashMap<>();
399
400    for( Map.Entry<Integer, Fields> entry : getValueFieldsMap().entrySet() )
401      {
402      Integer ordinal = entry.getKey();
403      Fields valueFields = entry.getValue();
404      Fields keyFields = keyFieldsMap.get( ordinal );
405
406      map.put( ordinal, maskVoid( valueFields, keyFields ).getTypesClasses() );
407      }
408
409    return map;
410    }
411
412  public List<Integer> getOrdinals()
413    {
414    if( ordinals == null )
415      ordinals = Util.split( Integer.class, ",", getConf().get( "cascading.node.ordinals" ) );
416
417    return ordinals;
418    }
419
420  public Integer getFirstOrdinal()
421    {
422    if( getOrdinals().isEmpty() )
423      return null;
424
425    return Util.getFirst( getOrdinals() );
426    }
427
428  public Map<Integer, Fields> getKeyFieldsMap()
429    {
430    if( keyFieldsMap == null )
431      keyFieldsMap = getFields( getConf(), "cascading.node.key.fields" );
432
433    return keyFieldsMap;
434    }
435
436  public Map<Integer, Fields> getSortFieldsMap()
437    {
438    if( sortFields == null )
439      sortFieldsMap = getFields( getConf(), "cascading.node.sort.fields" );
440
441    return sortFieldsMap;
442    }
443
444  public Map<Integer, Fields> getValueFieldsMap()
445    {
446    if( valueFieldsMap == null )
447      valueFieldsMap = getFields( getConf(), "cascading.node.value.fields" );
448
449    return valueFieldsMap;
450    }
451
452  /** Must be called before {@link #getClassNameFor(int)} and {@link #getTokenFor(String)} methods. */
453  void initTokenMaps()
454    {
455    if( tokenClassesMap != null )
456      return;
457
458    tokenClassesMap = new HashMap<>();
459    classesTokensMap = new HashMap<>();
460
461    String tokenProperty = getSerializationTokens( getConf() );
462
463    if( tokenProperty != null )
464      {
465      tokenProperty = tokenProperty.replaceAll( "\\s", "" ); // allow for whitespace in token set
466
467      for( String pair : tokenProperty.split( "," ) )
468        {
469        String[] elements = pair.split( "=" );
470        addToken( null, Integer.parseInt( elements[ 0 ] ), elements[ 1 ] );
471        }
472      }
473
474    String serializationsString = getSerializations( getConf() );
475
476    LOG.debug( "using hadoop serializations from the job conf: {} ", serializationsString );
477
478    if( serializationsString == null )
479      return;
480
481    String[] serializations = serializationsString.split( "," );
482
483    for( String serializationName : serializations )
484      {
485      try
486        {
487        Class type = getConf().getClassByName( serializationName );
488
489        SerializationToken tokenAnnotation = (SerializationToken) type.getAnnotation( SerializationToken.class );
490
491        if( tokenAnnotation == null )
492          continue;
493
494        if( tokenAnnotation.tokens().length != tokenAnnotation.classNames().length )
495          throw new CascadingException( "serialization annotation tokens and classNames must be the same length" );
496
497        int[] tokens = tokenAnnotation.tokens();
498
499        for( int i = 0; i < tokens.length; i++ )
500          addToken( type, tokens[ i ], tokenAnnotation.classNames()[ i ] );
501        }
502      catch( ClassNotFoundException exception )
503        {
504        LOG.warn( "unable to load serialization class: {}", serializationName, exception );
505        }
506      }
507
508    tokensSize = tokenClassesMap.size();
509    }
510
511  private void addToken( Class type, int token, String className )
512    {
513    if( type != null && !type.getName().startsWith( "cascading." ) && token < 128 )
514      throw new CascadingException( "serialization annotation tokens may not be less than 128, was: " + token );
515
516    if( tokenClassesMap.containsKey( token ) )
517      {
518      if( type == null )
519        throw new IllegalStateException( "duplicate serialization token: " + token + " for class: " + className + " found in properties" );
520
521      throw new IllegalStateException( "duplicate serialization token: " + token + " for class: " + className + " on serialization: " + type.getName() );
522      }
523
524    if( classesTokensMap.containsKey( className ) )
525      {
526      if( type == null )
527        throw new IllegalStateException( "duplicate serialization classname: " + className + " for token: " + token + " found in properties " );
528
529      throw new IllegalStateException( "duplicate serialization classname: " + className + " for token: " + token + " on serialization: " + type.getName() );
530      }
531
532    LOG.debug( "adding serialization token: {}, for classname: {}", token, className );
533
534    tokenClassesMap.put( token, className );
535    classesTokensMap.put( className, token );
536    }
537
538  /**
539   * Returns the className for the given token.
540   *
541   * @param token of type int
542   * @return a String
543   */
544  final String getClassNameFor( int token )
545    {
546    initTokenMaps();
547
548    if( tokensSize == 0 )
549      return null;
550
551    return tokenClassesMap.get( token );
552    }
553
554  final long getTokensMapSize()
555    {
556    return tokensSize;
557    }
558
559  /**
560   * Returns the token for the given className.
561   *
562   * @param className of type String
563   * @return an Integer
564   */
565  final Integer getTokenFor( String className )
566    {
567    initTokenMaps();
568
569    if( tokensSize == 0 )
570      return null;
571
572    return classesTokensMap.get( className );
573    }
574
575  public Comparator getDefaultComparator()
576    {
577    return defaultComparator;
578    }
579
580  public Comparator getComparator( Class type )
581    {
582    Serialization serialization = getSerialization( type );
583
584    Comparator comparator = null;
585
586    if( serialization instanceof Comparison )
587      comparator = ( (Comparison) serialization ).getComparator( type );
588
589    if( comparator != null )
590      return comparator;
591
592    return defaultComparator;
593    }
594
595  Serialization getSerialization( String className )
596    {
597    return getSerialization( getClass( className ) );
598    }
599
600  Serialization getSerialization( Class type )
601    {
602    return getSerializationFactory().getSerialization( type );
603    }
604
605  Serializer getNewSerializer( Class type )
606    {
607    try
608      {
609      Serializer serializer = getSerializationFactory().getSerializer( type );
610
611      if( serializer == null )
612        throw new CascadingException( "unable to load serializer for: " + type.getName() + " from: " + getSerializationFactory().getClass().getName() );
613
614      return serializer;
615      }
616    catch( NullPointerException exception )
617      {
618      throw new CascadingException( "unable to load serializer for: " + type.getName() + " from: " + getSerializationFactory().getClass().getName() );
619      }
620    }
621
622  Deserializer getNewDeserializer( String className )
623    {
624    try
625      {
626      Deserializer deserializer = getSerializationFactory().getDeserializer( getClass( className ) );
627
628      if( deserializer == null )
629        throw new CascadingException( "unable to load deserializer for: " + className + " from: " + getSerializationFactory().getClass().getName() );
630
631      return deserializer;
632      }
633    catch( NullPointerException exception )
634      {
635      throw new CascadingException( "unable to load deserializer for: " + className + " from: " + getSerializationFactory().getClass().getName() );
636      }
637    }
638
639  KeyTupleDeserializer getKeyTupleDeserializer()
640    {
641    return new KeyTupleDeserializer( getElementReader() );
642    }
643
644  ValueTupleDeserializer getValueTupleDeserializer()
645    {
646    return new ValueTupleDeserializer( getElementReader() );
647    }
648
649  TuplePairDeserializer getTuplePairDeserializer()
650    {
651    return new TuplePairDeserializer( getElementReader() );
652    }
653
654  /**
655   * Method getElementReader returns the elementReader of this TupleSerialization object.
656   *
657   * @return the elementReader (type SerializationElementReader) of this TupleSerialization object.
658   */
659  public SerializationElementReader getElementReader()
660    {
661    return new SerializationElementReader( this );
662    }
663
664  TupleDeserializer getTupleDeserializer()
665    {
666    return new TupleDeserializer( getElementReader() );
667    }
668
669  private KeyTupleSerializer getKeyTupleSerializer()
670    {
671    return new KeyTupleSerializer( getElementWriter() );
672    }
673
674  private ValueTupleSerializer getValueTupleSerializer()
675    {
676    return new ValueTupleSerializer( getElementWriter() );
677    }
678
679  private TuplePairSerializer getTuplePairSerializer()
680    {
681    return new TuplePairSerializer( getElementWriter() );
682    }
683
684  KeyIndexTupleDeserializer getKeyIndexTupleDeserializer()
685    {
686    return new KeyIndexTupleDeserializer( getElementReader() );
687    }
688
689  ValueIndexTupleDeserializer getValueIndexTupleDeserializer()
690    {
691    return new ValueIndexTupleDeserializer( getElementReader() );
692    }
693
694  IndexTupleDeserializer getIndexTupleDeserializer()
695    {
696    return new IndexTupleDeserializer( getElementReader() );
697    }
698
699  /**
700   * Method getElementWriter returns the elementWriter of this TupleSerialization object.
701   *
702   * @return the elementWriter (type SerializationElementWriter) of this TupleSerialization object.
703   */
704  public SerializationElementWriter getElementWriter()
705    {
706    return new SerializationElementWriter( this );
707    }
708
709  private TupleSerializer getTupleSerializer()
710    {
711    return new TupleSerializer( getElementWriter() );
712    }
713
714  private KeyIndexTupleSerializer getKeyIndexTupleSerializer()
715    {
716    return new KeyIndexTupleSerializer( getElementWriter() );
717    }
718
719  private ValueIndexTupleSerializer getValueIndexTupleSerializer()
720    {
721    return new ValueIndexTupleSerializer( getElementWriter() );
722    }
723
724  private IndexTupleSerializer getIndexTupleSerializer()
725    {
726    return new IndexTupleSerializer( getElementWriter() );
727    }
728
729  public boolean accept( Class c )
730    {
731    return Tuple.class == c ||
732      KeyTuple.class == c || ValueTuple.class == c ||
733      KeyIndexTuple.class == c || ValueIndexTuple.class == c ||
734      TuplePair.class == c || IndexTuple.class == c;
735    }
736
737  public Deserializer getDeserializer( Class c )
738    {
739    if( c == Tuple.class )
740      return getTupleDeserializer();
741    else if( c == KeyTuple.class )
742      return getKeyTupleDeserializer();
743    else if( c == ValueTuple.class )
744      return getValueTupleDeserializer();
745    else if( c == KeyIndexTuple.class )
746      return getKeyIndexTupleDeserializer();
747    else if( c == ValueIndexTuple.class )
748      return getValueIndexTupleDeserializer();
749    else if( c == TuplePair.class )
750      return getTuplePairDeserializer();
751    else if( c == IndexTuple.class )
752      return getIndexTupleDeserializer();
753
754    throw new IllegalArgumentException( "unknown class, cannot deserialize: " + c.getName() );
755    }
756
757  public Serializer getSerializer( Class c )
758    {
759    if( c == Tuple.class )
760      return getTupleSerializer();
761    else if( c == KeyTuple.class )
762      return getKeyTupleSerializer();
763    else if( c == ValueTuple.class )
764      return getValueTupleSerializer();
765    else if( c == KeyIndexTuple.class )
766      return getKeyIndexTupleSerializer();
767    else if( c == ValueIndexTuple.class )
768      return getValueIndexTupleSerializer();
769    else if( c == TuplePair.class )
770      return getTuplePairSerializer();
771    else if( c == IndexTuple.class )
772      return getIndexTupleSerializer();
773
774    throw new IllegalArgumentException( "unknown class, cannot serialize: " + c.getName() );
775    }
776
777  public Class getClass( String className )
778    {
779    Class type = classCache.get( className );
780
781    if( type != null )
782      return type;
783
784    try
785      {
786      if( className.charAt( 0 ) == '[' )
787        type = Class.forName( className, true, Thread.currentThread().getContextClassLoader() );
788      else
789        type = Thread.currentThread().getContextClassLoader().loadClass( className );
790      }
791    catch( ClassNotFoundException exception )
792      {
793      throw new TupleException( "unable to load class named: " + className, exception );
794      }
795
796    classCache.put( className, type );
797
798    return type;
799    }
800
801  public static Map<Integer, Fields> getFields( Configuration conf, String property )
802    {
803    try
804      {
805      return HadoopUtil.getFields( conf, property );
806      }
807    catch( IOException exception )
808      {
809      LOG.warn( "unable to get fields for: " + property );
810
811      return Collections.emptyMap();
812      }
813    }
814
815  private static Fields maskVoid( Fields fields, Fields mask )
816    {
817    if( fields == null )
818      return null;
819
820    if( mask == null || !fields.hasTypes() || !mask.hasTypes() )
821      return fields;
822
823    Fields voidedKey = mask.applyTypes( Fields.size( mask.size(), Void.class ) );
824
825    fields = fields.applyTypes( voidedKey );
826
827    return fields;
828    }
829
830  public static class SerializationElementReader implements TupleInputStream.ElementReader
831    {
832    /** Field LOG */
833    private static final Logger LOG = LoggerFactory.getLogger( SerializationElementReader.class );
834
835    /** Field tupleSerialization */
836    private final TupleSerialization tupleSerialization;
837
838    /** Field deserializers */
839    final Map<String, Deserializer> deserializers = new HashMap<String, Deserializer>();
840
841    /**
842     * Constructor SerializationElementReader creates a new SerializationElementReader instance.
843     *
844     * @param tupleSerialization of type TupleSerialization
845     */
846    public SerializationElementReader( TupleSerialization tupleSerialization )
847      {
848      this.tupleSerialization = tupleSerialization;
849      }
850
851    public TupleSerialization getTupleSerialization()
852      {
853      return tupleSerialization;
854      }
855
856    public Object read( int token, DataInputStream inputStream ) throws IOException
857      {
858      String className = getClassNameFor( token, inputStream );
859      Deserializer deserializer = getDeserializerFor( inputStream, className );
860
861      Object foundObject = null;
862      Object object;
863
864      try
865        {
866        object = deserializer.deserialize( foundObject );
867        }
868      catch( IOException exception )
869        {
870        LOG.error( "failed deserializing token: " + token + " with classname: " + className, exception );
871
872        throw exception;
873        }
874
875      return object;
876      }
877
878    public Object read( Class type, DataInputStream inputStream ) throws IOException
879      {
880      String className = type.getName();
881      Deserializer deserializer = getDeserializerFor( inputStream, className );
882
883      Object foundObject = null;
884      Object object;
885
886      try
887        {
888        object = deserializer.deserialize( foundObject );
889        }
890      catch( IOException exception )
891        {
892        LOG.error( "failed deserializing: " + className, exception );
893
894        throw exception;
895        }
896
897      return object;
898      }
899
900    @Override
901    public Comparator getComparatorFor( int token, DataInputStream inputStream ) throws IOException
902      {
903      Class type = tupleSerialization.getClass( getClassNameFor( token, inputStream ) );
904
905      return tupleSerialization.getComparator( type );
906      }
907
908    private Deserializer getDeserializerFor( DataInputStream inputStream, String className ) throws IOException
909      {
910      Deserializer deserializer = deserializers.get( className );
911
912      if( deserializer == null )
913        {
914        deserializer = tupleSerialization.getNewDeserializer( className );
915        deserializer.open( inputStream );
916        deserializers.put( className, deserializer );
917        }
918
919      return deserializer;
920      }
921
922    public String getClassNameFor( int token, DataInputStream inputStream ) throws IOException
923      {
924      String className = tupleSerialization.getClassNameFor( token );
925
926      try
927        {
928        if( className == null )
929          className = WritableUtils.readString( inputStream );
930        }
931      catch( IOException exception )
932        {
933        LOG.error( "unable to resolve token: {}, to a valid classname, with token map of size: {}, rethrowing IOException", token, tupleSerialization.getTokensMapSize() );
934        throw exception;
935        }
936
937      return className;
938      }
939
940    public void close()
941      {
942      if( deserializers.size() == 0 )
943        return;
944
945      Collection<Deserializer> clone = new ArrayList<Deserializer>( deserializers.values() );
946
947      deserializers.clear();
948
949      for( Deserializer deserializer : clone )
950        {
951        try
952          {
953          deserializer.close();
954          }
955        catch( IOException exception )
956          {
957          // do nothing
958          }
959        }
960      }
961    }
962
963  public static class SerializationElementWriter implements TupleOutputStream.ElementWriter
964    {
965    /** Field LOG */
966    private static final Logger LOG = LoggerFactory.getLogger( SerializationElementWriter.class );
967
968    /** Field tupleSerialization */
969    private final TupleSerialization tupleSerialization;
970
971    /** Field serializers */
972    final Map<Class, Serializer> serializers = new HashMap<Class, Serializer>();
973
974    public SerializationElementWriter( TupleSerialization tupleSerialization )
975      {
976      this.tupleSerialization = tupleSerialization;
977      }
978
979    public TupleSerialization getTupleSerialization()
980      {
981      return tupleSerialization;
982      }
983
984    public void write( DataOutputStream outputStream, Object object ) throws IOException
985      {
986      Class<?> type = object.getClass();
987      String className = type.getName();
988      Integer token = tupleSerialization.getTokenFor( className );
989
990      if( token == null )
991        {
992        LOG.debug( "no serialization token found for classname: {}", className );
993
994        WritableUtils.writeVInt( outputStream, HadoopTupleOutputStream.WRITABLE_TOKEN ); // denotes to punt to hadoop serialization
995        WritableUtils.writeString( outputStream, className );
996        }
997      else
998        {
999        WritableUtils.writeVInt( outputStream, token );
1000        }
1001
1002      Serializer serializer = getSerializer( outputStream, type );
1003
1004      try
1005        {
1006        serializer.serialize( object );
1007        }
1008      catch( IOException exception )
1009        {
1010        LOG.error( "failed serializing token: " + token + " with classname: " + className, exception );
1011
1012        throw exception;
1013        }
1014      }
1015
1016    private Serializer getSerializer( DataOutputStream outputStream, Class<?> type ) throws IOException
1017      {
1018      Serializer serializer = serializers.get( type );
1019
1020      if( serializer == null )
1021        {
1022        serializer = tupleSerialization.getNewSerializer( type );
1023        serializer.open( outputStream );
1024        serializers.put( type, serializer );
1025        }
1026
1027      return serializer;
1028      }
1029
1030    public void write( DataOutputStream outputStream, Class<?> type, Object object ) throws IOException
1031      {
1032      Serializer serializer = getSerializer( outputStream, type );
1033
1034      try
1035        {
1036        serializer.serialize( object );
1037        }
1038      catch( IOException exception )
1039        {
1040        LOG.error( "failed serializing type: " + type.getName(), exception );
1041
1042        throw exception;
1043        }
1044      }
1045
1046    public void close()
1047      {
1048      if( serializers.size() == 0 )
1049        return;
1050
1051      Collection<Serializer> clone = new ArrayList<Serializer>( serializers.values() );
1052
1053      serializers.clear();
1054
1055      for( Serializer serializer : clone )
1056        {
1057        try
1058          {
1059          serializer.close();
1060          }
1061        catch( IOException exception )
1062          {
1063          // do nothing
1064          }
1065        }
1066      }
1067    }
1068  }