001/* 002 * Copyright (c) 2007-2022 The Cascading Authors. All Rights Reserved. 003 * 004 * Project and contact information: https://cascading.wensel.net/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.local.tap.aws.s3; 022 023import java.io.IOException; 024import java.io.UncheckedIOException; 025import java.nio.file.Files; 026import java.nio.file.Path; 027import java.nio.file.Paths; 028import java.util.Collections; 029import java.util.LinkedHashMap; 030import java.util.Map; 031import java.util.function.Function; 032 033/** 034 * Class S3FileCheckpointer persists a map of bucket names and last seen keys to disk 035 * so multiple starts of a given application or new Flow instances relying on a {@link S3Tap} 036 * will re-retrieve the same data as previous executions. 037 * <p> 038 * By default this class will write all checkpoints to the directory {@code [user.home]/.cascading/s3tap-checkpoints} 039 * in files named {@code [bucket-name].txt}. 040 * <p> 041 * A {@link Function} can be supplied to override the filename creation. 042 */ 043public class S3FileCheckpointer implements S3Checkpointer 044 { 045 Map<String, String> seenKeys = new LinkedHashMap<>(); 046 047 Path path = makeHidden( homeDir() ); 048 Function<String, String> filename = bucket -> bucket + ".txt"; 049 050 /** 051 * Method homeDir uses the System property {@code user.home} to retrieve the user's home directory 052 * 053 * @return Path 054 */ 055 056 public static Path homeDir() 057 { 058 return Paths.get( System.getProperty( "user.home" ) ); 059 } 060 061 /** 062 * Method currentDir uses the System property {@code user.dir} to retrieve the user's current working directory 063 * 064 * @return Path 065 */ 066 public static Path currentDir() 067 { 068 return Paths.get( System.getProperty( "user.dir" ) ); 069 } 070 071 /** 072 * Method makeHidden will append {@code .cascading/s3tap-checkpoints} to the given Path instance. 073 * 074 * @param path of Path 075 * @return Path 076 */ 077 public static Path makeHidden( Path path ) 078 { 079 return path.resolve( ".cascading" ).resolve( "s3tap-checkpoints" ); 080 } 081 082 /** 083 * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance. 084 */ 085 public S3FileCheckpointer() 086 { 087 } 088 089 /** 090 * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance. 091 * 092 * @param path of String 093 */ 094 public S3FileCheckpointer( String path ) 095 { 096 this( Paths.get( path ) ); 097 } 098 099 /** 100 * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance. 101 * 102 * @param path of Path 103 */ 104 public S3FileCheckpointer( Path path ) 105 { 106 this.path = path; 107 } 108 109 /** 110 * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance. 111 * 112 * @param filename of Function 113 */ 114 public S3FileCheckpointer( Function<String, String> filename ) 115 { 116 this.filename = filename; 117 } 118 119 /** 120 * Constructor S3FileCheckpointer creates a new S3FileCheckpointer instance. 121 * 122 * @param path of Path 123 * @param filename of Function 124 */ 125 public S3FileCheckpointer( Path path, Function<String, String> filename ) 126 { 127 this.path = path; 128 this.filename = filename; 129 } 130 131 @Override 132 public String getLastKey( String bucketName ) 133 { 134 Path input = getPathFor( bucketName ); 135 136 if( !Files.exists( input ) ) 137 return null; 138 139 try 140 { 141 return Files.lines( input ).findFirst().orElse( null ); 142 } 143 catch( IOException exception ) 144 { 145 throw new UncheckedIOException( exception ); 146 } 147 } 148 149 private Path getPathFor( String bucketName ) 150 { 151 return path.resolve( filename.apply( bucketName ) ); 152 } 153 154 @Override 155 public void setLastKey( String bucketName, String key ) 156 { 157 seenKeys.put( bucketName, key ); 158 } 159 160 @Override 161 public void commit() 162 { 163 for( Map.Entry<String, String> entry : seenKeys.entrySet() ) 164 { 165 try 166 { 167 Path bucketPath = getPathFor( entry.getKey() ); 168 169 Files.createDirectories( bucketPath.getParent() ); 170 171 Files.write( bucketPath, Collections.singleton( entry.getValue() ) ); 172 } 173 catch( IOException exception ) 174 { 175 throw new UncheckedIOException( exception ); 176 } 177 } 178 179 seenKeys.clear(); 180 } 181 }