001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.local.tap.aws.s3;
022
023import java.io.DataOutputStream;
024import java.io.FilterInputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.OutputStream;
028import java.net.URI;
029import java.net.URISyntaxException;
030import java.util.ArrayList;
031import java.util.Iterator;
032import java.util.List;
033import java.util.Properties;
034import java.util.function.Predicate;
035import java.util.regex.Pattern;
036
037import cascading.flow.FlowProcess;
038import cascading.property.PropertyUtil;
039import cascading.scheme.FileFormat;
040import cascading.scheme.Scheme;
041import cascading.tap.SinkMode;
042import cascading.tap.Tap;
043import cascading.tap.TapException;
044import cascading.tap.local.PartitionTap;
045import cascading.tap.type.FileType;
046import cascading.tap.type.TapWith;
047import cascading.tuple.TupleEntryCollector;
048import cascading.tuple.TupleEntryIterator;
049import cascading.tuple.TupleEntrySchemeCollector;
050import cascading.tuple.TupleEntrySchemeIterator;
051import cascading.util.CloseableIterator;
052import cascading.util.Util;
053import com.amazonaws.ClientConfiguration;
054import com.amazonaws.SdkClientException;
055import com.amazonaws.client.builder.AwsClientBuilder;
056import com.amazonaws.event.ProgressEvent;
057import com.amazonaws.event.ProgressEventType;
058import com.amazonaws.services.s3.AmazonS3;
059import com.amazonaws.services.s3.AmazonS3ClientBuilder;
060import com.amazonaws.services.s3.Headers;
061import com.amazonaws.services.s3.model.AmazonS3Exception;
062import com.amazonaws.services.s3.model.ObjectMetadata;
063import com.amazonaws.services.s3.model.PutObjectRequest;
064import com.amazonaws.services.s3.model.S3ObjectSummary;
065import com.amazonaws.services.s3.transfer.PersistableTransfer;
066import com.amazonaws.services.s3.transfer.Transfer;
067import com.amazonaws.services.s3.transfer.TransferManager;
068import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
069import com.amazonaws.services.s3.transfer.Upload;
070import com.amazonaws.services.s3.transfer.internal.S3ProgressListener;
071import com.amazonaws.services.s3.transfer.model.UploadResult;
072import com.google.common.io.ByteSource;
073import com.google.common.io.FileBackedOutputStream;
074import org.slf4j.Logger;
075import org.slf4j.LoggerFactory;
076
077import static cascading.util.Util.isEmpty;
078
079/**
080 * Class S3Tap is a Cascading local-mode {@link Tap} providing read and write access to data stored in Amazon S3 buckets.
081 * <p>
082 * This Tap is not intended to be used with any of the other Cascading planners unless they specify they are local-mode
083 * compatible.
084 * <p>
085 * S3Tap can read a single key, all objects underneath a key-prefix, or all objects under a key-prefix that match
086 * a given globbing pattern.
087 * <p>
088 * See the various constructors for the available access parametrizations. Of note are the constructors that take
089 * a {@link URI} instance. The URI should be in the following format:
090 * {@code s3://[bucket]/<key|key-prefix><?glob>}
091 * <p>
092 * Where bucket is the only required value. The key references a single object, the key-prefix is used to access
093 * a set of objects with a common prefix value. The glob value is use to further narrow the resulting object set.
094 * <p>
095 * The globbing pattern is specified by the {@link java.nio.file.FileSystem#getPathMatcher} method.
096 * <p>
097 * This Tap was designed to allow applications to effectively poll an S3 bucket for new keys to be processed.
098 * <p>
099 * When used with the {@link S3FileCheckpointer} class, a map of keys last consumed by each bucket will be tracked
100 * on disk, with the map surviving JVM restarts allowing for applications to exit and restart safely without
101 * retrieving duplicate data.
102 * <p>
103 * The {@link S3Checkpointer#commit()} method is only called during a graceful shutdown of the Flow or JVM, but every
104 * consumed key is passed to the S3Checkpointer, so custom implementations can choose to persist the key more
105 * frequently.
106 * <p>
107 * AWS Credentials are handled by {@link com.amazonaws.auth.DefaultAWSCredentialsProviderChain}.
108 */
109public class S3Tap extends Tap<Properties, InputStream, OutputStream> implements FileType<Properties>, TapWith<Properties, InputStream, OutputStream>
110  {
111  /** Field LOG */
112  private static final Logger LOG = LoggerFactory.getLogger( S3Tap.class );
113
114  /** Field SEQUENCE_TOKEN */
115  public static final String SEQUENCE_TOKEN = "{sequence}";
116  /** Field MIME_DIRECTORY */
117  public static final String MIME_DIRECTORY = "application/x-directory";
118  /** Field DEFAULT_DELIMITER */
119  public static final String DEFAULT_DELIMITER = "/";
120
121  /** Field s3Client */
122  AmazonS3 s3Client;
123  /** Field bucketName */
124  String bucketName;
125  /** Field key */
126  String key;
127  /** Field filter */
128  Predicate<String> filter;
129  /** Field delimiter */
130  String delimiter = DEFAULT_DELIMITER;
131  /** Field checkpointer */
132  S3Checkpointer checkpointer;
133
134  private transient ObjectMetadata objectMetadata;
135
136  /**
137   * Method makeURI creates a new S3 URI from the given parameters.
138   *
139   * @param bucketName the S3 bucket name
140   * @param keyPrefix  the S3 object key or key-prefix
141   * @return an URI instance
142   */
143  public static URI makeURI( String bucketName, String keyPrefix )
144    {
145    return makeURI( bucketName, keyPrefix, null );
146    }
147
148  /**
149   * Method makeURI creates a new S3 URI from the given parameters.
150   *
151   * @param bucketName the S3 bucket name
152   * @param keyPrefix  the S3 object key or key-prefix
153   * @param glob       the globbing pattern to apply to the keys
154   * @return an URI instance
155   */
156  public static URI makeURI( String bucketName, String keyPrefix, String glob )
157    {
158    if( bucketName == null )
159      throw new IllegalArgumentException( "bucketName may not be null" );
160
161    try
162      {
163      if( keyPrefix == null )
164        keyPrefix = "/";
165      else if( !keyPrefix.startsWith( "/" ) )
166        keyPrefix = "/" + keyPrefix;
167
168      return new URI( "s3", bucketName, keyPrefix, glob, null );
169      }
170    catch( URISyntaxException exception )
171      {
172      throw new IllegalArgumentException( exception.getMessage(), exception );
173      }
174    }
175
176  /**
177   * Constructor S3Tap creates a new S3Tap instance.
178   *
179   * @param scheme     of Scheme
180   * @param bucketName of String
181   */
182  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName )
183    {
184    this( scheme, bucketName, null, null, null, SinkMode.KEEP );
185    }
186
187  /**
188   * Constructor S3Tap creates a new S3Tap instance.
189   *
190   * @param scheme     of Scheme
191   * @param bucketName of String
192   * @param key        of String
193   */
194  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key )
195    {
196    this( scheme, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
197    }
198
199  /**
200   * Constructor S3Tap creates a new S3Tap instance.
201   *
202   * @param scheme     of Scheme
203   * @param bucketName of String
204   * @param key        of String
205   * @param delimiter  of String
206   */
207  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter )
208    {
209    this( scheme, null, null, bucketName, key, delimiter, SinkMode.KEEP );
210    }
211
212  /**
213   * Constructor S3Tap creates a new S3Tap instance.
214   *
215   * @param scheme     of Scheme
216   * @param bucketName of String
217   * @param filter     of Predicate
218   */
219  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter )
220    {
221    this( scheme, bucketName, null, filter, SinkMode.KEEP );
222    }
223
224  /**
225   * Constructor S3Tap creates a new S3Tap instance.
226   *
227   * @param scheme     of Scheme
228   * @param bucketName of String
229   * @param key        of String
230   * @param filter     of Predicate
231   */
232  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter )
233    {
234    this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP );
235    }
236
237  /**
238   * Constructor S3Tap creates a new S3Tap instance.
239   *
240   * @param scheme     of Scheme
241   * @param bucketName of String
242   * @param key        of String
243   * @param delimiter  of String
244   * @param filter     of Predicate
245   */
246  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter )
247    {
248    this( scheme, null, null, bucketName, key, delimiter, filter, SinkMode.KEEP );
249    }
250
251  /**
252   * Constructor S3Tap creates a new S3Tap instance.
253   *
254   * @param scheme     of Scheme
255   * @param s3Client   of AmazonS3
256   * @param bucketName of String
257   */
258  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName )
259    {
260    this( scheme, s3Client, bucketName, null, SinkMode.KEEP );
261    }
262
263  /**
264   * Constructor S3Tap creates a new S3Tap instance.
265   *
266   * @param scheme     of Scheme
267   * @param s3Client   of AmazonS3
268   * @param bucketName of String
269   * @param key        of String
270   */
271  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key )
272    {
273    this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
274    }
275
276  /**
277   * Constructor S3Tap creates a new S3Tap instance.
278   *
279   * @param scheme     of Scheme
280   * @param s3Client   of AmazonS3
281   * @param bucketName of String
282   * @param key        of String
283   * @param delimiter  of String
284   */
285  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter )
286    {
287    this( scheme, s3Client, bucketName, key, delimiter, null, SinkMode.KEEP );
288    }
289
290  /**
291   * Constructor S3Tap creates a new S3Tap instance.
292   *
293   * @param scheme     of Scheme
294   * @param s3Client   of AmazonS3
295   * @param bucketName of String
296   * @param key        of String
297   * @param delimiter  of String
298   * @param filter     of Predicate
299   */
300  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter )
301    {
302    this( scheme, s3Client, null, bucketName, key, delimiter, filter, SinkMode.KEEP );
303    }
304
305  /**
306   * Constructor S3Tap creates a new S3Tap instance.
307   *
308   * @param scheme       of Scheme
309   * @param checkpointer of S3Checkpointer
310   * @param bucketName   of String
311   */
312  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName )
313    {
314    this( scheme, checkpointer, bucketName, null, null, null, SinkMode.KEEP );
315    }
316
317  /**
318   * Constructor S3Tap creates a new S3Tap instance.
319   *
320   * @param scheme       of Scheme
321   * @param checkpointer of S3Checkpointer
322   * @param bucketName   of String
323   * @param key          of String
324   */
325  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key )
326    {
327    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
328    }
329
330  /**
331   * Constructor S3Tap creates a new S3Tap instance.
332   *
333   * @param scheme       of Scheme
334   * @param checkpointer of S3Checkpointer
335   * @param bucketName   of String
336   * @param key          of String
337   * @param delimiter    of String
338   */
339  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter )
340    {
341    this( scheme, null, checkpointer, bucketName, key, delimiter, SinkMode.KEEP );
342    }
343
344  /**
345   * Constructor S3Tap creates a new S3Tap instance.
346   *
347   * @param scheme       of Scheme
348   * @param checkpointer of S3Checkpointer
349   * @param bucketName   of String
350   * @param filter       of Predicate
351   */
352  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter )
353    {
354    this( scheme, checkpointer, bucketName, null, filter, SinkMode.KEEP );
355    }
356
357  /**
358   * Constructor S3Tap creates a new S3Tap instance.
359   *
360   * @param scheme       of Scheme
361   * @param checkpointer of S3Checkpointer
362   * @param bucketName   of String
363   * @param key          of String
364   * @param filter       of Predicate
365   */
366  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter )
367    {
368    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP );
369    }
370
371  /**
372   * Constructor S3Tap creates a new S3Tap instance.
373   *
374   * @param scheme       of Scheme
375   * @param checkpointer of S3Checkpointer
376   * @param bucketName   of String
377   * @param key          of String
378   * @param delimiter    of String
379   * @param filter       of Predicate
380   */
381  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter )
382    {
383    this( scheme, null, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP );
384    }
385
386  /**
387   * Constructor S3Tap creates a new S3Tap instance.
388   *
389   * @param scheme       of Scheme
390   * @param s3Client     of AmazonS3
391   * @param checkpointer of S3Checkpointer
392   * @param bucketName   of String
393   */
394  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName )
395    {
396    this( scheme, s3Client, checkpointer, bucketName, null, SinkMode.KEEP );
397    }
398
399  /**
400   * Constructor S3Tap creates a new S3Tap instance.
401   *
402   * @param scheme       of Scheme
403   * @param s3Client     of AmazonS3
404   * @param checkpointer of S3Checkpointer
405   * @param bucketName   of String
406   * @param key          of String
407   */
408  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key )
409    {
410    this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
411    }
412
413  /**
414   * Constructor S3Tap creates a new S3Tap instance.
415   *
416   * @param scheme       of Scheme
417   * @param s3Client     of AmazonS3
418   * @param checkpointer of S3Checkpointer
419   * @param bucketName   of String
420   * @param key          of String
421   * @param delimiter    of String
422   */
423  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter )
424    {
425    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, SinkMode.KEEP );
426    }
427
428  /**
429   * Constructor S3Tap creates a new S3Tap instance.
430   *
431   * @param scheme       of Scheme
432   * @param s3Client     of AmazonS3
433   * @param checkpointer of S3Checkpointer
434   * @param bucketName   of String
435   * @param key          of String
436   * @param delimiter    of String
437   * @param filter       of Predicate
438   */
439  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter )
440    {
441    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP );
442    }
443
444  /**
445   * Constructor S3Tap creates a new S3Tap instance.
446   *
447   * @param scheme     of Scheme
448   * @param bucketName of String
449   */
450  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, SinkMode sinkMode )
451    {
452    this( scheme, bucketName, null, null, null, sinkMode );
453    }
454
455  /**
456   * Constructor S3Tap creates a new S3Tap instance.
457   *
458   * @param scheme     of Scheme
459   * @param bucketName of String
460   * @param key        of String
461   */
462  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, SinkMode sinkMode )
463    {
464    this( scheme, bucketName, key, DEFAULT_DELIMITER, sinkMode );
465    }
466
467  /**
468   * Constructor S3Tap creates a new S3Tap instance.
469   *
470   * @param scheme     of Scheme
471   * @param bucketName of String
472   * @param key        of String
473   * @param delimiter  of String
474   */
475  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, SinkMode sinkMode )
476    {
477    this( scheme, null, null, bucketName, key, delimiter, sinkMode );
478    }
479
480  /**
481   * Constructor S3Tap creates a new S3Tap instance.
482   *
483   * @param scheme     of Scheme
484   * @param bucketName of String
485   * @param filter     of Predicate
486   */
487  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter, SinkMode sinkMode )
488    {
489    this( scheme, bucketName, null, filter, sinkMode );
490    }
491
492  /**
493   * Constructor S3Tap creates a new S3Tap instance.
494   *
495   * @param scheme     of Scheme
496   * @param bucketName of String
497   * @param key        of String
498   * @param filter     of Predicate
499   */
500  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode )
501    {
502    this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode );
503    }
504
505  /**
506   * Constructor S3Tap creates a new S3Tap instance.
507   *
508   * @param scheme     of Scheme
509   * @param bucketName of String
510   * @param key        of String
511   * @param delimiter  of String
512   * @param filter     of Predicate
513   * @param sinkMode   of SinkMode
514   */
515  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
516    {
517    this( scheme, null, null, bucketName, key, delimiter, filter, sinkMode );
518    }
519
520  /**
521   * Constructor S3Tap creates a new S3Tap instance.
522   *
523   * @param scheme     of Scheme
524   * @param s3Client   of AmazonS3
525   * @param bucketName of String
526   * @param sinkMode   of SinkMode
527   */
528  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, SinkMode sinkMode )
529    {
530    this( scheme, s3Client, bucketName, null, sinkMode );
531    }
532
533  /**
534   * Constructor S3Tap creates a new S3Tap instance.
535   *
536   * @param scheme     of Scheme
537   * @param s3Client   of AmazonS3
538   * @param bucketName of String
539   * @param key        of String
540   * @param sinkMode   of SinkMode
541   */
542  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, SinkMode sinkMode )
543    {
544    this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, sinkMode );
545    }
546
547  /**
548   * Constructor S3Tap creates a new S3Tap instance.
549   *
550   * @param scheme     of Scheme
551   * @param s3Client   of AmazonS3
552   * @param bucketName of String
553   * @param key        of String
554   * @param delimiter  of String
555   * @param sinkMode   of SinkMode
556   */
557  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, SinkMode sinkMode )
558    {
559    this( scheme, s3Client, bucketName, key, delimiter, null, sinkMode );
560    }
561
562  /**
563   * Constructor S3Tap creates a new S3Tap instance.
564   *
565   * @param scheme     of Scheme
566   * @param s3Client   of AmazonS3
567   * @param bucketName of String
568   * @param key        of String
569   * @param delimiter  of String
570   * @param filter     of Predicate
571   * @param sinkMode   of SinkMode
572   */
573  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
574    {
575    this( scheme, s3Client, null, bucketName, key, delimiter, filter, sinkMode );
576    }
577
578  /**
579   * Constructor S3Tap creates a new S3Tap instance.
580   *
581   * @param scheme       of Scheme
582   * @param checkpointer of S3Checkpointer
583   * @param bucketName   of String
584   * @param sinkMode     of SinkMode
585   */
586  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode )
587    {
588    this( scheme, checkpointer, bucketName, null, null, null, sinkMode );
589    }
590
591  /**
592   * Constructor S3Tap creates a new S3Tap instance.
593   *
594   * @param scheme       of Scheme
595   * @param checkpointer of S3Checkpointer
596   * @param bucketName   of String
597   * @param key          of String
598   * @param sinkMode     of SinkMode
599   */
600  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode )
601    {
602    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode );
603    }
604
605  /**
606   * Constructor S3Tap creates a new S3Tap instance.
607   *
608   * @param scheme       of Scheme
609   * @param checkpointer of S3Checkpointer
610   * @param bucketName   of String
611   * @param key          of String
612   * @param delimiter    of String
613   * @param sinkMode     of SinkMode
614   */
615  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode )
616    {
617    this( scheme, null, checkpointer, bucketName, key, delimiter, sinkMode );
618    }
619
620  /**
621   * Constructor S3Tap creates a new S3Tap instance.
622   *
623   * @param scheme       of Scheme
624   * @param checkpointer of S3Checkpointer
625   * @param bucketName   of String
626   * @param filter       of Predicate
627   * @param sinkMode     of SinkMode
628   */
629  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter, SinkMode sinkMode )
630    {
631    this( scheme, checkpointer, bucketName, null, filter, sinkMode );
632    }
633
634  /**
635   * Constructor S3Tap creates a new S3Tap instance.
636   *
637   * @param scheme       of Scheme
638   * @param checkpointer of S3Checkpointer
639   * @param bucketName   of String
640   * @param key          of String
641   * @param filter       of Predicate
642   * @param sinkMode     of SinkMode
643   */
644  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode )
645    {
646    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode );
647    }
648
649  /**
650   * Constructor S3Tap creates a new S3Tap instance.
651   *
652   * @param scheme       of Scheme
653   * @param checkpointer of S3Checkpointer
654   * @param bucketName   of String
655   * @param key          of String
656   * @param delimiter    of String
657   * @param filter       of Predicate
658   * @param sinkMode     of SinkMode
659   */
660  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
661    {
662    this( scheme, null, checkpointer, bucketName, key, delimiter, filter, sinkMode );
663    }
664
665  /**
666   * Constructor S3Tap creates a new S3Tap instance.
667   *
668   * @param scheme       of Scheme
669   * @param s3Client     of AmazonS3
670   * @param checkpointer of S3Checkpointer
671   * @param bucketName   of String
672   * @param sinkMode     of SinkMode
673   */
674  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode )
675    {
676    this( scheme, s3Client, checkpointer, bucketName, null, sinkMode );
677    }
678
679  /**
680   * Constructor S3Tap creates a new S3Tap instance.
681   *
682   * @param scheme       of Scheme
683   * @param s3Client     of AmazonS3
684   * @param checkpointer of S3Checkpointer
685   * @param bucketName   of String
686   * @param key          of String
687   * @param sinkMode     of SinkMode
688   */
689  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode )
690    {
691    this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode );
692    }
693
694  /**
695   * Constructor S3Tap creates a new S3Tap instance.
696   *
697   * @param scheme       of Scheme
698   * @param s3Client     of AmazonS3
699   * @param checkpointer of S3Checkpointer
700   * @param bucketName   of String
701   * @param key          of String
702   * @param delimiter    of String
703   * @param sinkMode     of SinkMode
704   */
705  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode )
706    {
707    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, sinkMode );
708    }
709
710  /**
711   * Constructor S3Tap creates a new S3Tap instance.
712   *
713   * @param scheme       of Scheme
714   * @param s3Client     of AmazonS3
715   * @param checkpointer of S3Checkpointer
716   * @param bucketName   of String
717   * @param key          of String
718   * @param delimiter    of String
719   * @param filter       of Predicate
720   * @param sinkMode     of SinkMode
721   */
722  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
723    {
724    super( scheme, sinkMode );
725    this.s3Client = s3Client;
726    this.checkpointer = checkpointer;
727    this.bucketName = bucketName;
728
729    if( isEmpty( this.bucketName ) )
730      throw new IllegalArgumentException( "bucket name may not be null or empty" );
731
732    this.key = key;
733    this.delimiter = delimiter;
734    this.filter = filter;
735    }
736
737  /**
738   * Constructor S3Tap creates a new S3Tap instance.
739   *
740   * @param scheme     of Scheme
741   * @param identifier of URI
742   */
743  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier )
744    {
745    this( scheme, null, null, identifier, SinkMode.KEEP );
746    }
747
748  /**
749   * Constructor S3Tap creates a new S3Tap instance.
750   *
751   * @param scheme     of Scheme
752   * @param s3Client   of AmazonS3
753   * @param identifier of URI
754   */
755  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier )
756    {
757    this( scheme, s3Client, null, identifier, SinkMode.KEEP );
758    }
759
760  /**
761   * Constructor S3Tap creates a new S3Tap instance.
762   *
763   * @param scheme       of Scheme
764   * @param checkpointer of S3Checkpointer
765   * @param identifier   of URI
766   */
767  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier )
768    {
769    this( scheme, null, checkpointer, identifier, SinkMode.KEEP );
770    }
771
772  /**
773   * Constructor S3Tap creates a new S3Tap instance.
774   *
775   * @param scheme       of Scheme
776   * @param s3Client     of AmazonS3
777   * @param checkpointer of S3Checkpointer
778   * @param identifier   of URI
779   */
780  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier )
781    {
782    this( scheme, s3Client, checkpointer, identifier, SinkMode.KEEP );
783    }
784
785  /**
786   * Constructor S3Tap creates a new S3Tap instance.
787   *
788   * @param scheme     of Scheme
789   * @param identifier of URI
790   * @param sinkMode   of SinkMode
791   */
792  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier, SinkMode sinkMode )
793    {
794    this( scheme, null, null, identifier, sinkMode );
795    }
796
797  /**
798   * Constructor S3Tap creates a new S3Tap instance.
799   *
800   * @param scheme     of Scheme
801   * @param s3Client   of AmazonS3
802   * @param identifier of URI
803   * @param sinkMode   of SinkMode
804   */
805  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode )
806    {
807    this( scheme, s3Client, null, identifier, sinkMode );
808    }
809
810  /**
811   * Constructor S3Tap creates a new S3Tap instance.
812   *
813   * @param scheme       of Scheme
814   * @param checkpointer of S3Checkpointer
815   * @param identifier   of URI
816   * @param sinkMode     of SinkMode
817   */
818  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode )
819    {
820    this( scheme, null, checkpointer, identifier, sinkMode );
821    }
822
823  /**
824   * Constructor S3Tap creates a new S3Tap instance.
825   *
826   * @param scheme       of Scheme
827   * @param s3Client     of AmazonS3
828   * @param checkpointer of S3Checkpointer
829   * @param identifier   of URI
830   * @param sinkMode     of SinkMode
831   */
832  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode )
833    {
834    super( scheme, sinkMode );
835    this.s3Client = s3Client;
836    this.checkpointer = checkpointer;
837
838    if( identifier == null )
839      throw new IllegalArgumentException( "identifier may not be null" );
840
841    if( !identifier.getScheme().equalsIgnoreCase( "s3" ) )
842      throw new IllegalArgumentException( "identifier does not have s3 scheme" );
843
844    this.bucketName = getBucketNameFor( identifier );
845
846    if( isEmpty( this.bucketName ) )
847      throw new IllegalArgumentException( "bucket name may not be null or empty" + identifier );
848
849    this.key = cleanKey( identifier );
850
851    if( identifier.getQuery() != null )
852      filter = globPredicate( identifier.getQuery() );
853    }
854
855  protected String getBucketNameFor( URI identifier )
856    {
857    String authority = identifier.getAuthority();
858
859    if( isEmpty( authority ) )
860      throw new IllegalArgumentException( "identifier must have an authority: " + identifier );
861
862    int pos = authority.indexOf( '@' );
863
864    if( pos != -1 )
865      return authority.substring( pos + 1 );
866
867    return authority;
868    }
869
870  private static Predicate<String> globPredicate( String glob )
871    {
872    String regex = getRegexForGlob( glob );
873    Pattern pattern = Pattern.compile( regex );
874
875    return string -> pattern.matcher( string ).matches();
876    }
877
878  private static String getRegexForGlob( String glob )
879    {
880    return (String) Util.invokeStaticMethod(
881      "sun.nio.fs.Globs",
882      "toUnixRegexPattern",
883      new Object[]{glob},
884      new Class[]{String.class}
885    );
886    }
887
888  @Override
889  public TapWith<Properties, InputStream, OutputStream> withScheme( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme )
890    {
891    // don't lazily create s3Client
892    return create( scheme, s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), getSinkMode() );
893    }
894
895  @Override
896  public TapWith<Properties, InputStream, OutputStream> withChildIdentifier( String identifier )
897    {
898    URI uri;
899
900    if( identifier.startsWith( "s3://" ) )
901      uri = URI.create( identifier );
902    else if( identifier.startsWith( getBucketName() ) )
903      uri = makeURI( identifier, null );
904    else
905      uri = makeURI( getBucketName(), getKey() + ( identifier.startsWith( delimiter ) ? identifier : delimiter + identifier ) );
906
907    // don't lazily create s3Client
908    return create( getScheme(), s3Client, uri, getSinkMode() );
909    }
910
911  protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode )
912    {
913    return new S3Tap( scheme, s3Client, identifier, sinkMode );
914    }
915
916  @Override
917  public TapWith<Properties, InputStream, OutputStream> withSinkMode( SinkMode sinkMode )
918    {
919    // don't lazily create s3Client
920    return create( getScheme(), s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), sinkMode );
921    }
922
923  protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
924    {
925    return new S3Tap( scheme, s3Client, bucketName, key, delimiter, filter, sinkMode );
926    }
927
928  protected String cleanKey( URI identifier )
929    {
930    String path = identifier.normalize().getPath();
931
932    if( path.startsWith( "/" ) )
933      path = path.substring( 1 );
934
935    return path;
936    }
937
938  protected AmazonS3 getS3Client( Properties properties )
939    {
940    // return provided client
941    if( s3Client != null )
942      return s3Client;
943
944    AmazonS3ClientBuilder standard = AmazonS3ClientBuilder.standard();
945
946    if( properties != null )
947      {
948      String endpoint = properties.getProperty( S3TapProps.S3_ENDPOINT );
949      String region = properties.getProperty( S3TapProps.S3_REGION, "us-east-1" );
950
951      if( properties.containsKey( S3TapProps.S3_PROXY_HOST ) )
952        {
953        ClientConfiguration config = new ClientConfiguration()
954          .withProxyHost( properties.getProperty( S3TapProps.S3_PROXY_HOST ) )
955          .withProxyPort( PropertyUtil.getIntProperty( properties, S3TapProps.S3_PROXY_PORT, -1 ) );
956
957        standard.withClientConfiguration( config );
958        }
959
960      if( endpoint != null )
961        standard.withEndpointConfiguration( new AwsClientBuilder.EndpointConfiguration( endpoint, region ) );
962      else
963        standard.setRegion( region );
964
965      if( Boolean.parseBoolean( properties.getProperty( S3TapProps.S3_PATH_STYLE_ACCESS, "false" ) ) )
966        standard.enablePathStyleAccess();
967      }
968
969    return standard.build();
970    }
971
972  /**
973   * Method getCheckpointer returns the checkpointer of this S3Tap object.
974   *
975   * @return the checkpointer (type S3Checkpointer) of this S3Tap object.
976   */
977  public S3Checkpointer getCheckpointer()
978    {
979    return checkpointer;
980    }
981
982  /**
983   * Method getBucketName returns the bucketName of this S3Tap object.
984   *
985   * @return the bucketName (type String) of this S3Tap object.
986   */
987  public String getBucketName()
988    {
989    return bucketName;
990    }
991
992  /**
993   * Method getKey returns the key of this S3Tap object.
994   *
995   * @return the key (type String) of this S3Tap object.
996   */
997  public String getKey()
998    {
999    return key;
1000    }
1001
1002  protected String getMarker()
1003    {
1004    if( checkpointer != null )
1005      return checkpointer.getLastKey( getBucketName() );
1006
1007    return null;
1008    }
1009
1010  protected void setLastMarker( String marker )
1011    {
1012    if( checkpointer != null )
1013      checkpointer.setLastKey( getBucketName(), marker );
1014    }
1015
1016  protected void commitMarker()
1017    {
1018    if( checkpointer != null )
1019      checkpointer.commit();
1020    }
1021
1022  /**
1023   * Method getFilter returns the filter of this S3Tap object.
1024   *
1025   * @return the filter (type Predicate) of this S3Tap object.
1026   */
1027  public Predicate<String> getFilter()
1028    {
1029    return filter;
1030    }
1031
1032  /**
1033   * Method getDelimiter returns the delimiter of this S3Tap object.
1034   *
1035   * @return the delimiter (type String) of this S3Tap object.
1036   */
1037  public String getDelimiter()
1038    {
1039    return delimiter;
1040    }
1041
1042  @Override
1043  public String getIdentifier()
1044    {
1045    return makeStringIdentifier( getBucketName(), getKey() );
1046    }
1047
1048  @Override
1049  public String getFullIdentifier( Properties conf )
1050    {
1051    return getIdentifier();
1052    }
1053
1054  @Override
1055  public boolean deleteResource( Properties conf ) throws IOException
1056    {
1057    AmazonS3 s3Client = getS3Client( conf );
1058
1059    try
1060      {
1061      s3Client.deleteObject( getBucketName(), getKey() );
1062      }
1063    catch( AmazonS3Exception exception )
1064      {
1065      throw handleException( s3Client, exception );
1066      }
1067
1068    return true;
1069    }
1070
1071  @Override
1072  public boolean createResource( Properties conf ) throws IOException
1073    {
1074    AmazonS3 s3Client = getS3Client( conf );
1075
1076    try
1077      {
1078      s3Client.putObject( getBucketName(), getKey(), "" );
1079      }
1080    catch( AmazonS3Exception exception )
1081      {
1082      throw handleException( s3Client, exception );
1083      }
1084
1085    return true;
1086    }
1087
1088  protected ObjectMetadata getObjectMetadata( Properties conf )
1089    {
1090    try
1091      {
1092      if( objectMetadata == null )
1093        objectMetadata = getS3Client( conf ).getObjectMetadata( getBucketName(), getKey() );
1094
1095      return objectMetadata;
1096      }
1097    catch( AmazonS3Exception exception )
1098      {
1099      throw handleException( getS3Client( conf ), exception );
1100      }
1101    }
1102
1103  private class CheckedFilterInputStream extends FilterInputStream
1104    {
1105    public CheckedFilterInputStream( InputStream inputStream )
1106      {
1107      super( inputStream );
1108      }
1109    }
1110
1111  @Override
1112  public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException
1113    {
1114    AmazonS3 s3Client = getS3Client( flowProcess.getConfig() );
1115
1116    final String[] identifier = new String[ 1 ];
1117
1118    CloseableIterator<InputStream> iterator = new CloseableIterator<InputStream>()
1119      {
1120      S3Iterable iterable = S3Iterable.iterable( s3Client, getBucketName(), getKey() )
1121        .withFilter( getFilter() )
1122        .withMarker( getMarker() );
1123
1124      Iterator<S3ObjectSummary> iterator = iterable.iterator();
1125      InputStream lastInputStream;
1126
1127      @Override
1128      public boolean hasNext()
1129        {
1130        return iterator.hasNext();
1131        }
1132
1133      @Override
1134      public InputStream next()
1135        {
1136        safeClose();
1137
1138        S3ObjectSummary objectSummary = iterator.next();
1139
1140        identifier[ 0 ] = makeStringIdentifier( objectSummary.getBucketName(), objectSummary.getKey() );
1141
1142        flowProcess.getFlowProcessContext().setSourcePath( identifier[ 0 ] );
1143
1144        if( LOG.isDebugEnabled() )
1145          LOG.debug( "s3 retrieving: {}/{}, with size: {}", objectSummary.getBucketName(), objectSummary.getKey(), objectSummary.getSize() );
1146
1147        // getObject does not seem to fill the InputStream, nor does the InputStream support marking
1148        // may make sense to wrap this iterator in a iterate ahead iterator that attempts to pre-fetch objects in a different thread
1149        lastInputStream = new CheckedFilterInputStream( s3Client.getObject( objectSummary.getBucketName(), objectSummary.getKey() ).getObjectContent() )
1150          {
1151          @Override
1152          public void close() throws IOException
1153            {
1154            setLastMarker( objectSummary.getKey() );
1155            super.close();
1156            }
1157          };
1158
1159        return lastInputStream;
1160        }
1161
1162      private void safeClose()
1163        {
1164        try
1165          {
1166          if( lastInputStream != null )
1167            lastInputStream.close();
1168
1169          lastInputStream = null;
1170          }
1171        catch( IOException exception )
1172          {
1173          // do nothing
1174          }
1175        }
1176
1177      @Override
1178      public void close()
1179        {
1180        safeClose();
1181        commitMarker();
1182        }
1183      };
1184
1185    return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), iterator, () -> identifier[ 0 ] );
1186    }
1187
1188  @Override
1189  public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream outputStream ) throws IOException
1190    {
1191    AmazonS3 s3Client = getS3Client( flowProcess.getConfig() );
1192
1193    if( !s3Client.doesBucketExistV2( getBucketName() ) )
1194      s3Client.createBucket( getBucketName() );
1195
1196    final String key = resolveKey( flowProcess, getKey() );
1197
1198    FileBackedOutputStream fileBackedOutputStream = new FileBackedOutputStream( 512_000, true );
1199    DataOutputStream dataOutputStream = new DataOutputStream( fileBackedOutputStream );
1200    ByteSource byteSource = fileBackedOutputStream.asByteSource();
1201
1202    TransferManager transferManager = TransferManagerBuilder.standard().withS3Client( s3Client ).build();
1203
1204    final String loggableIdentifier = makeStringIdentifier( getBucketName(), key );
1205
1206    return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), dataOutputStream, loggableIdentifier )
1207      {
1208      @Override
1209      public void close()
1210        {
1211        super.close(); // flushes and closes output
1212
1213        LOG.info( "s3 starting async upload: {}", loggableIdentifier );
1214
1215        InputStream inputStream = openInputStream( byteSource, loggableIdentifier );
1216
1217        try
1218          {
1219          ObjectMetadata metadata = new ObjectMetadata();
1220          metadata.setHeader( Headers.CONTENT_LENGTH, (long) dataOutputStream.size() );
1221
1222          Upload upload = createUpload( key, transferManager, new PutObjectRequest( getBucketName(), key, inputStream, metadata ) );
1223
1224          UploadResult uploadResult = upload.waitForUploadResult(); // never return null, throws an exception
1225
1226          handleResult( upload, uploadResult, loggableIdentifier );
1227          }
1228        catch( SdkClientException exception )
1229          {
1230          LOG.error( "s3 upload failed on: " + loggableIdentifier, exception );
1231          throw new TapException( "s3 upload failed on: " + loggableIdentifier, exception );
1232          }
1233        catch( InterruptedException exception )
1234          {
1235          // ignore
1236          }
1237        finally
1238          {
1239          transferManager.shutdownNow( false );
1240          }
1241        }
1242      };
1243    }
1244
1245  protected void handleResult( Upload upload, UploadResult uploadResult, String loggableIdentifier )
1246    {
1247    Transfer.TransferState state = upload.getState();
1248
1249    if( state == Transfer.TransferState.Canceled )
1250      {
1251      LOG.warn( "s3 canceled upload: {}, with key: {}", getIdentifier(), uploadResult.getKey() );
1252      }
1253    else if( state == Transfer.TransferState.Failed ) // can this happen?
1254      {
1255      LOG.error( "s3 failed upload: {}, with key: {}", getIdentifier(), uploadResult.getKey() );
1256      throw new TapException( "s3 upload failed on: " + loggableIdentifier );
1257      }
1258    else
1259      {
1260      LOG.info( "s3 completed upload: {}, with key: {}", getIdentifier(), uploadResult.getKey() );
1261      }
1262    }
1263
1264  protected InputStream openInputStream( ByteSource byteSource, String loggableIdentifier )
1265    {
1266    InputStream inputStream;
1267    try
1268      {
1269      inputStream = byteSource.openBufferedStream();
1270      }
1271    catch( IOException exception )
1272      {
1273      LOG.error( "s3 upload failed on: " + loggableIdentifier, exception );
1274      throw new TapException( "s3 upload failed on: " + loggableIdentifier, exception );
1275      }
1276
1277    return inputStream;
1278    }
1279
1280  protected Upload createUpload( String key, TransferManager transferManager, PutObjectRequest request )
1281    {
1282    return transferManager.upload( request, new S3ProgressListener()
1283      {
1284      @Override
1285      public void onPersistableTransfer( PersistableTransfer persistableTransfer )
1286        {
1287        if( LOG.isDebugEnabled() )
1288          LOG.debug( "s3 for: {}, persistable transfer: {}", key, persistableTransfer );
1289        }
1290
1291      @Override
1292      public void progressChanged( ProgressEvent progressEvent )
1293        {
1294        if( progressEvent.getEventType() == ProgressEventType.TRANSFER_FAILED_EVENT )
1295          LOG.error( "s3 for: {}, event: {}", key, progressEvent );
1296        if( progressEvent.getEventType() == ProgressEventType.TRANSFER_CANCELED_EVENT )
1297          LOG.warn( "s3 for: {}, event: {}", key, progressEvent );
1298        if( progressEvent.getEventType() == ProgressEventType.TRANSFER_PART_FAILED_EVENT )
1299          LOG.warn( "s3 for: {}, event: {}", key, progressEvent );
1300        else if( LOG.isDebugEnabled() )
1301          LOG.debug( "s3 for: {}, event: {}", key, progressEvent );
1302        }
1303      } );
1304    }
1305
1306  protected String resolveKey( FlowProcess<? extends Properties> flowProcess, String key )
1307    {
1308    int partNum = flowProcess.getIntegerProperty( PartitionTap.PART_NUM_PROPERTY, 0 );
1309
1310    key = key.replace( SEQUENCE_TOKEN, String.format( "%05d", partNum ) );
1311
1312    if( getScheme() instanceof FileFormat )
1313      return key + "." + ( (FileFormat) getScheme() ).getExtension();
1314
1315    return key;
1316    }
1317
1318  @Override
1319  public boolean resourceExists( Properties conf ) throws IOException
1320    {
1321    AmazonS3 s3Client = getS3Client( conf );
1322
1323    try
1324      {
1325      if( getKey() == null )
1326        return s3Client.doesBucketExistV2( getBucketName() );
1327
1328      return s3Client.doesObjectExist( getBucketName(), getKey() );
1329      }
1330    catch( AmazonS3Exception exception )
1331      {
1332      throw handleException( s3Client, exception );
1333      }
1334    }
1335
1336  protected AmazonS3Exception handleException( AmazonS3 s3Client, AmazonS3Exception exception )
1337    {
1338    if( exception.getStatusCode() == 400 )
1339      {
1340      LOG.error( "s3 request failed, try changing the AWS Region from: {}, using property: {}", s3Client.getRegionName(), S3TapProps.S3_REGION, exception );
1341      }
1342
1343    return exception;
1344    }
1345
1346  @Override
1347  public long getModifiedTime( Properties conf ) throws IOException
1348    {
1349    return getObjectMetadata( conf ).getLastModified().getTime();
1350    }
1351
1352  @Override
1353  public boolean isDirectory( FlowProcess<? extends Properties> flowProcess ) throws IOException
1354    {
1355    return MIME_DIRECTORY.equalsIgnoreCase( getObjectMetadata( flowProcess.getConfig() ).getContentType() );
1356    }
1357
1358  @Override
1359  public boolean isDirectory( Properties conf ) throws IOException
1360    {
1361    return isDirectory( FlowProcess.nullFlowProcess() );
1362    }
1363
1364  @Override
1365  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException
1366    {
1367    return getChildIdentifiers( flowProcess.getConfig() );
1368    }
1369
1370  @Override
1371  public String[] getChildIdentifiers( Properties conf ) throws IOException
1372    {
1373    return getChildIdentifiers( conf, 1, false );
1374    }
1375
1376  @Override
1377  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException
1378    {
1379    return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified );
1380    }
1381
1382  @Override
1383  public String[] getChildIdentifiers( Properties conf, int depth, boolean fullyQualified ) throws IOException
1384    {
1385    if( !resourceExists( conf ) )
1386      return new String[ 0 ];
1387
1388    S3Iterable objects = S3Iterable.iterable( getS3Client( conf ), getBucketName(), getKey() )
1389      .withDelimiter( getDelimiter() )
1390      .withMaxDepth( depth )
1391      .withFilter( getFilter() )
1392      .withMarker( getMarker() );
1393
1394    Iterator<S3ObjectSummary> iterator = objects.iterator();
1395
1396    List<String> results = new ArrayList<>();
1397
1398    while( iterator.hasNext() )
1399      results.add( makePath( iterator, fullyQualified ) );
1400
1401    return results.toArray( new String[ results.size() ] );
1402    }
1403
1404  protected String makePath( Iterator<S3ObjectSummary> iterator, boolean fullyQualified )
1405    {
1406    String key = iterator.next().getKey();
1407
1408    if( fullyQualified )
1409      return makeStringIdentifier( getBucketName(), key );
1410
1411    return key.substring( getKey().length() );
1412    }
1413
1414  @Override
1415  public long getSize( FlowProcess<? extends Properties> flowProcess ) throws IOException
1416    {
1417    return getSize( flowProcess.getConfig() );
1418    }
1419
1420  @Override
1421  public long getSize( Properties conf ) throws IOException
1422    {
1423    if( isDirectory( conf ) )
1424      return 0;
1425
1426    return getObjectMetadata( conf ).getInstanceLength();
1427    }
1428
1429  protected static String makeStringIdentifier( String bucketName, String keyPrefix )
1430    {
1431    if( isEmpty( keyPrefix ) )
1432      return String.format( "s3://%s/", bucketName );
1433
1434    return String.format( "s3://%s/%s", bucketName, keyPrefix );
1435    }
1436  }