public class FileSystemTracer
extends org.apache.hadoop.fs.FileSystem
LakeFSFileSystem and by S3AFileSystem.
How to use the FileSystemTracer:
To use the FileSystemTracer, you should set the value of the Hadoop configuration 'fs.lakefs.impl' to FileSystemTracer.
This configures the FileSystemTracer to be the file system that handles paths with the lakefs scheme. i.e. paths with
lakefs:// prefix.
How does the FileSystemTracer work:
The FileSystemTracer holds instances of LakeFSFileSystem and S3AFileSystem. On a file system operation,
the FileSystemTracer invokes the operation on both file systems, logs the output of both calls, and returns the result
of one of the file systems based on a configuration (S3AFileSystem output by default).
Configuration:
- fs.lakefs.tracer.working_dir - the s3 location in which the tracer can operate. This should be an S3 bucket name or
an absolute path of a directory on a bucket.
Optional -
- fs.lakefs.tracer.use_lakefs_output - tells the tracer whether it should return the response coming from the lakefs file
system or return s3a's response. by default it is set to false and returns s3a's response.
Assumptions:
- The content of lakefs://repository/branch/ and s3a://${fs.lakefs.tracer.working.dir}/ should be identical.
- The s3 credentials available for Spark allow access to fs.lakefs.tracer.working.dir on s3.| Constructor and Description |
|---|
FileSystemTracer() |
| Modifier and Type | Method and Description |
|---|---|
org.apache.hadoop.fs.FSDataOutputStream |
append(org.apache.hadoop.fs.Path f,
int bufferSize,
org.apache.hadoop.util.Progressable progress) |
org.apache.hadoop.fs.FSDataOutputStream |
create(org.apache.hadoop.fs.Path f,
org.apache.hadoop.fs.permission.FsPermission permission,
boolean overwrite,
int bufferSize,
short replication,
long blockSize,
org.apache.hadoop.util.Progressable progress) |
boolean |
delete(org.apache.hadoop.fs.Path f,
boolean recursive) |
org.apache.hadoop.fs.FileStatus |
getFileStatus(org.apache.hadoop.fs.Path f) |
URI |
getUri() |
org.apache.hadoop.fs.Path |
getWorkingDirectory() |
void |
initialize(URI name,
org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.FileStatus[] |
listStatus(org.apache.hadoop.fs.Path f) |
org.apache.hadoop.fs.Path |
makeQualified(org.apache.hadoop.fs.Path path) |
boolean |
mkdirs(org.apache.hadoop.fs.Path f,
org.apache.hadoop.fs.permission.FsPermission permission) |
org.apache.hadoop.fs.FSDataInputStream |
open(org.apache.hadoop.fs.Path f,
int bufferSize) |
boolean |
rename(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst) |
void |
setWorkingDirectory(org.apache.hadoop.fs.Path newDir) |
access, addDelegationTokens, append, append, areSymlinksEnabled, cancelDeleteOnExit, canonicalizeUri, checkPath, clearStatistics, close, closeAll, closeAllForUGI, completeLocalOutput, concat, copyFromLocalFile, copyFromLocalFile, copyFromLocalFile, copyFromLocalFile, copyToLocalFile, copyToLocalFile, copyToLocalFile, create, create, create, create, create, create, create, create, create, create, create, create, createNewFile, createNonRecursive, createNonRecursive, createNonRecursive, createSnapshot, createSnapshot, createSymlink, delete, deleteOnExit, deleteSnapshot, enableSymlinks, exists, fixRelativePart, get, get, get, getAclStatus, getAllStatistics, getBlockSize, getCanonicalServiceName, getCanonicalUri, getChildFileSystems, getContentSummary, getDefaultBlockSize, getDefaultBlockSize, getDefaultPort, getDefaultReplication, getDefaultReplication, getDefaultUri, getDelegationToken, getFileBlockLocations, getFileBlockLocations, getFileChecksum, getFileChecksum, getFileLinkStatus, getFileSystemClass, getFSofPath, getHomeDirectory, getInitialWorkingDirectory, getLength, getLinkTarget, getLocal, getName, getNamed, getReplication, getScheme, getServerDefaults, getServerDefaults, getStatistics, getStatistics, getStatus, getStatus, getUsed, getXAttr, getXAttrs, getXAttrs, globStatus, globStatus, isDirectory, isFile, listCorruptFileBlocks, listFiles, listLocatedStatus, listLocatedStatus, listStatus, listStatus, listStatus, listStatusIterator, listXAttrs, mkdirs, mkdirs, modifyAclEntries, moveFromLocalFile, moveFromLocalFile, moveToLocalFile, newInstance, newInstance, newInstance, newInstanceLocal, open, primitiveCreate, primitiveMkdir, primitiveMkdir, printStatistics, processDeleteOnExit, removeAcl, removeAclEntries, removeDefaultAcl, removeXAttr, rename, renameSnapshot, resolveLink, resolvePath, setAcl, setDefaultUri, setDefaultUri, setOwner, setPermission, setReplication, setTimes, setVerifyChecksum, setWriteChecksum, setXAttr, setXAttr, startLocalOutput, supportsSymlinks, truncatepublic void initialize(URI name, org.apache.hadoop.conf.Configuration conf) throws IOException
initialize in class org.apache.hadoop.fs.FileSystemIOExceptionpublic URI getUri()
getUri in class org.apache.hadoop.fs.FileSystempublic org.apache.hadoop.fs.Path makeQualified(org.apache.hadoop.fs.Path path)
makeQualified in class org.apache.hadoop.fs.FileSystempublic org.apache.hadoop.fs.FSDataInputStream open(org.apache.hadoop.fs.Path f,
int bufferSize)
throws IOException
open in class org.apache.hadoop.fs.FileSystemIOExceptionpublic org.apache.hadoop.fs.FSDataOutputStream create(org.apache.hadoop.fs.Path f,
org.apache.hadoop.fs.permission.FsPermission permission,
boolean overwrite,
int bufferSize,
short replication,
long blockSize,
org.apache.hadoop.util.Progressable progress)
throws IOException
create in class org.apache.hadoop.fs.FileSystemIOExceptionpublic org.apache.hadoop.fs.FSDataOutputStream append(org.apache.hadoop.fs.Path f,
int bufferSize,
org.apache.hadoop.util.Progressable progress)
throws IOException
append in class org.apache.hadoop.fs.FileSystemIOExceptionpublic boolean rename(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
throws IOException
rename in class org.apache.hadoop.fs.FileSystemIOExceptionpublic boolean delete(org.apache.hadoop.fs.Path f,
boolean recursive)
throws IOException
delete in class org.apache.hadoop.fs.FileSystemIOExceptionpublic org.apache.hadoop.fs.FileStatus[] listStatus(org.apache.hadoop.fs.Path f)
throws FileNotFoundException,
IOException
listStatus in class org.apache.hadoop.fs.FileSystemFileNotFoundExceptionIOExceptionpublic void setWorkingDirectory(org.apache.hadoop.fs.Path newDir)
setWorkingDirectory in class org.apache.hadoop.fs.FileSystempublic org.apache.hadoop.fs.Path getWorkingDirectory()
getWorkingDirectory in class org.apache.hadoop.fs.FileSystempublic boolean mkdirs(org.apache.hadoop.fs.Path f,
org.apache.hadoop.fs.permission.FsPermission permission)
throws IOException
mkdirs in class org.apache.hadoop.fs.FileSystemIOExceptionpublic org.apache.hadoop.fs.FileStatus getFileStatus(org.apache.hadoop.fs.Path f)
throws IOException
getFileStatus in class org.apache.hadoop.fs.FileSystemIOExceptionCopyright © 2023. All rights reserved.