001/*
002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved.
003 *
004 * Project and contact information: https://cascading.wensel.net/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.local.tap.aws.s3;
022
023import java.io.IOException;
024import java.io.UncheckedIOException;
025import java.nio.file.Files;
026import java.nio.file.Path;
027import java.nio.file.Paths;
028import java.util.Collections;
029import java.util.LinkedHashMap;
030import java.util.Map;
031import java.util.function.Function;
032
033/**
034 * Class S3FileCheckpointer persists a map of bucket names and last seen keys to disk
035 * so multiple starts of a given application or new Flow instances relying on a {@link S3Tap}
036 * will re-retrieve the same data as previous executions.
037 * <p>
038 * By default this class will write all checkpoints to the directory {@code [user.home]/.cascading/s3tap-checkpoints}
039 * in files named {@code [bucket-name].txt}.
040 * <p>
041 * A {@link Function} can be supplied to override the filename creation.
042 */
043public class S3FileCheckpointer implements S3Checkpointer
044  {
045  Map<String, String> seenKeys = new LinkedHashMap<>();
046
047  Path path = makeHidden( homeDir() );
048  Function<String, String> filename = bucket -> bucket + ".txt";
049
050  /**
051   * Method homeDir uses the System property {@code user.home} to retrieve the user's home directory
052   *
053   * @return Path
054   */
055
056  public static Path homeDir()
057    {
058    return Paths.get( System.getProperty( "user.home" ) );
059    }
060
061  /**
062   * Method currentDir uses the System property {@code user.dir} to retrieve the user's current working directory
063   *
064   * @return Path
065   */
066  public static Path currentDir()
067    {
068    return Paths.get( System.getProperty( "user.dir" ) );
069    }
070
071  /**
072   * Method makeHidden will append {@code .cascading/s3tap-checkpoints} to the given Path instance.
073   *
074   * @param path of Path
075   * @return Path
076   */
077  public static Path makeHidden( Path path )
078    {
079    return path.resolve( ".cascading" ).resolve( "s3tap-checkpoints" );
080    }
081
082  /**
083   * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance.
084   */
085  public S3FileCheckpointer()
086    {
087    }
088
089  /**
090   * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance.
091   *
092   * @param path of String
093   */
094  public S3FileCheckpointer( String path )
095    {
096    this( Paths.get( path ) );
097    }
098
099  /**
100   * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance.
101   *
102   * @param path of Path
103   */
104  public S3FileCheckpointer( Path path )
105    {
106    this.path = path;
107    }
108
109  /**
110   * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance.
111   *
112   * @param filename of Function
113   */
114  public S3FileCheckpointer( Function<String, String> filename )
115    {
116    this.filename = filename;
117    }
118
119  /**
120   * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance.
121   *
122   * @param path     of Path
123   * @param filename of Function
124   */
125  public S3FileCheckpointer( Path path, Function<String, String> filename )
126    {
127    this.path = path;
128    this.filename = filename;
129    }
130
131  @Override
132  public String getLastKey( String bucketName )
133    {
134    Path input = getPathFor( bucketName );
135
136    if( !Files.exists( input ) )
137      return null;
138
139    try
140      {
141      return Files.lines( input ).findFirst().orElse( null );
142      }
143    catch( IOException exception )
144      {
145      throw new UncheckedIOException( exception );
146      }
147    }
148
149  private Path getPathFor( String bucketName )
150    {
151    return path.resolve( filename.apply( bucketName ) );
152    }
153
154  @Override
155  public void setLastKey( String bucketName, String key )
156    {
157    seenKeys.put( bucketName, key );
158    }
159
160  @Override
161  public void commit()
162    {
163    for( Map.Entry<String, String> entry : seenKeys.entrySet() )
164      {
165      try
166        {
167        Path bucketPath = getPathFor( entry.getKey() );
168
169        Files.createDirectories( bucketPath.getParent() );
170
171        Files.write( bucketPath, Collections.singleton( entry.getValue() ) );
172        }
173      catch( IOException exception )
174        {
175        throw new UncheckedIOException( exception );
176        }
177      }
178
179    seenKeys.clear();
180    }
181  }