001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumSet;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.Stack;
035 import java.util.TreeSet;
036 import java.util.Map.Entry;
037
038 import org.apache.commons.logging.Log;
039 import org.apache.commons.logging.LogFactory;
040 import org.apache.hadoop.HadoopIllegalArgumentException;
041 import org.apache.hadoop.classification.InterfaceAudience;
042 import org.apache.hadoop.classification.InterfaceStability;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileSystem.Statistics;
045 import org.apache.hadoop.fs.Options.CreateOpts;
046 import org.apache.hadoop.fs.permission.FsPermission;
047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
049 import org.apache.hadoop.io.IOUtils;
050 import org.apache.hadoop.ipc.RpcClientException;
051 import org.apache.hadoop.ipc.RpcServerException;
052 import org.apache.hadoop.ipc.UnexpectedServerException;
053 import org.apache.hadoop.fs.InvalidPathException;
054 import org.apache.hadoop.security.AccessControlException;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057 import org.apache.hadoop.util.ShutdownHookManager;
058
059 /**
060 * The FileContext class provides an interface to the application writer for
061 * using the Hadoop file system.
062 * It provides a set of methods for the usual operation: create, open,
063 * list, etc
064 *
065 * <p>
066 * <b> *** Path Names *** </b>
067 * <p>
068 *
069 * The Hadoop file system supports a URI name space and URI names.
070 * It offers a forest of file systems that can be referenced using fully
071 * qualified URIs.
072 * Two common Hadoop file systems implementations are
073 * <ul>
074 * <li> the local file system: file:///path
075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path
076 * </ul>
077 *
078 * While URI names are very flexible, it requires knowing the name or address
079 * of the server. For convenience one often wants to access the default system
080 * in one's environment without knowing its name/address. This has an
081 * additional benefit that it allows one to change one's default fs
082 * (e.g. admin moves application from cluster1 to cluster2).
083 * <p>
084 *
085 * To facilitate this, Hadoop supports a notion of a default file system.
086 * The user can set his default file system, although this is
087 * typically set up for you in your environment via your default config.
088 * A default file system implies a default scheme and authority; slash-relative
089 * names (such as /for/bar) are resolved relative to that default FS.
090 * Similarly a user can also have working-directory-relative names (i.e. names
091 * not starting with a slash). While the working directory is generally in the
092 * same default FS, the wd can be in a different FS.
093 * <p>
094 * Hence Hadoop path names can be one of:
095 * <ul>
096 * <li> fully qualified URI: scheme://authority/path
097 * <li> slash relative names: /path relative to the default file system
098 * <li> wd-relative names: path relative to the working dir
099 * </ul>
100 * Relative paths with scheme (scheme:foo/bar) are illegal.
101 *
102 * <p>
103 * <b>****The Role of the FileContext and configuration defaults****</b>
104 * <p>
105 * The FileContext provides file namespace context for resolving file names;
106 * it also contains the umask for permissions, In that sense it is like the
107 * per-process file-related state in Unix system.
108 * These two properties
109 * <ul>
110 * <li> default file system i.e your slash)
111 * <li> umask
112 * </ul>
113 * in general, are obtained from the default configuration file
114 * in your environment, (@see {@link Configuration}).
115 *
116 * No other configuration parameters are obtained from the default config as
117 * far as the file context layer is concerned. All file system instances
118 * (i.e. deployments of file systems) have default properties; we call these
119 * server side (SS) defaults. Operation like create allow one to select many
120 * properties: either pass them in as explicit parameters or use
121 * the SS properties.
122 * <p>
123 * The file system related SS defaults are
124 * <ul>
125 * <li> the home directory (default is "/user/userName")
126 * <li> the initial wd (only for local fs)
127 * <li> replication factor
128 * <li> block size
129 * <li> buffer size
130 * <li> bytesPerChecksum (if used).
131 * </ul>
132 *
133 * <p>
134 * <b> *** Usage Model for the FileContext class *** </b>
135 * <p>
136 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
137 * Unspecified values come from core-defaults.xml in the release jar.
138 * <ul>
139 * <li> myFContext = FileContext.getFileContext(); // uses the default config
140 * // which has your default FS
141 * <li> myFContext.create(path, ...);
142 * <li> myFContext.setWorkingDir(path)
143 * <li> myFContext.open (path, ...);
144 * </ul>
145 * Example 2: Get a FileContext with a specific URI as the default FS
146 * <ul>
147 * <li> myFContext = FileContext.getFileContext(URI)
148 * <li> myFContext.create(path, ...);
149 * ...
150 * </ul>
151 * Example 3: FileContext with local file system as the default
152 * <ul>
153 * <li> myFContext = FileContext.getLocalFSFileContext()
154 * <li> myFContext.create(path, ...);
155 * <li> ...
156 * </ul>
157 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG
158 * Generally you should not need use a config unless you are doing
159 * <ul>
160 * <li> configX = someConfigSomeOnePassedToYou.
161 * <li> myFContext = getFileContext(configX); // configX is not changed,
162 * // is passed down
163 * <li> myFContext.create(path, ...);
164 * <li>...
165 * </ul>
166 *
167 */
168
169 @InterfaceAudience.Public
170 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
171 public final class FileContext {
172
173 public static final Log LOG = LogFactory.getLog(FileContext.class);
174 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
175
176 /**
177 * Priority of the FileContext shutdown hook.
178 */
179 public static final int SHUTDOWN_HOOK_PRIORITY = 20;
180
181 /**
182 * List of files that should be deleted on JVM shutdown.
183 */
184 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT =
185 new IdentityHashMap<FileContext, Set<Path>>();
186
187 /** JVM shutdown hook thread. */
188 static final FileContextFinalizer FINALIZER =
189 new FileContextFinalizer();
190
191 private static final PathFilter DEFAULT_FILTER = new PathFilter() {
192 public boolean accept(final Path file) {
193 return true;
194 }
195 };
196
197 /**
198 * The FileContext is defined by.
199 * 1) defaultFS (slash)
200 * 2) wd
201 * 3) umask
202 */
203 private final AbstractFileSystem defaultFS; //default FS for this FileContext.
204 private Path workingDir; // Fully qualified
205 private FsPermission umask;
206 private final Configuration conf;
207 private final UserGroupInformation ugi;
208
209 private FileContext(final AbstractFileSystem defFs,
210 final FsPermission theUmask, final Configuration aConf) {
211 defaultFS = defFs;
212 umask = FsPermission.getUMask(aConf);
213 conf = aConf;
214 try {
215 ugi = UserGroupInformation.getCurrentUser();
216 } catch (IOException e) {
217 LOG.error("Exception in getCurrentUser: ",e);
218 throw new RuntimeException("Failed to get the current user " +
219 "while creating a FileContext", e);
220 }
221 /*
222 * Init the wd.
223 * WorkingDir is implemented at the FileContext layer
224 * NOT at the AbstractFileSystem layer.
225 * If the DefaultFS, such as localFilesystem has a notion of
226 * builtin WD, we use that as the initial WD.
227 * Otherwise the WD is initialized to the home directory.
228 */
229 workingDir = defaultFS.getInitialWorkingDirectory();
230 if (workingDir == null) {
231 workingDir = defaultFS.getHomeDirectory();
232 }
233 util = new Util(); // for the inner class
234 }
235
236 /*
237 * Remove relative part - return "absolute":
238 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar"
239 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
240 * ("/foo/bar") are returned unchanged.
241 *
242 * Applications that use FileContext should use #makeQualified() since
243 * they really want a fully qualified URI.
244 * Hence this method is not called makeAbsolute() and
245 * has been deliberately declared private.
246 */
247 private Path fixRelativePart(Path p) {
248 if (p.isUriPathAbsolute()) {
249 return p;
250 } else {
251 return new Path(workingDir, p);
252 }
253 }
254
255 /**
256 * Delete all the paths that were marked as delete-on-exit.
257 */
258 static void processDeleteOnExit() {
259 synchronized (DELETE_ON_EXIT) {
260 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet();
261 for (Entry<FileContext, Set<Path>> entry : set) {
262 FileContext fc = entry.getKey();
263 Set<Path> paths = entry.getValue();
264 for (Path path : paths) {
265 try {
266 fc.delete(path, true);
267 } catch (IOException e) {
268 LOG.warn("Ignoring failure to deleteOnExit for path " + path);
269 }
270 }
271 }
272 DELETE_ON_EXIT.clear();
273 }
274 }
275
276 /**
277 * Pathnames with scheme and relative path are illegal.
278 * @param path to be checked
279 */
280 private static void checkNotSchemeWithRelative(final Path path) {
281 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
282 throw new HadoopIllegalArgumentException(
283 "Unsupported name: has scheme but relative path-part");
284 }
285 }
286
287 /**
288 * Get the file system of supplied path.
289 *
290 * @param absOrFqPath - absolute or fully qualified path
291 * @return the file system of the path
292 *
293 * @throws UnsupportedFileSystemException If the file system for
294 * <code>absOrFqPath</code> is not supported.
295 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could
296 * not be instantiated.
297 */
298 private AbstractFileSystem getFSofPath(final Path absOrFqPath)
299 throws UnsupportedFileSystemException, IOException {
300 checkNotSchemeWithRelative(absOrFqPath);
301 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
302 throw new HadoopIllegalArgumentException(
303 "FileContext Bug: path is relative");
304 }
305
306 try {
307 // Is it the default FS for this FileContext?
308 defaultFS.checkPath(absOrFqPath);
309 return defaultFS;
310 } catch (Exception e) { // it is different FileSystem
311 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf);
312 }
313 }
314
315 private static AbstractFileSystem getAbstractFileSystem(
316 UserGroupInformation user, final URI uri, final Configuration conf)
317 throws UnsupportedFileSystemException, IOException {
318 try {
319 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() {
320 public AbstractFileSystem run() throws UnsupportedFileSystemException {
321 return AbstractFileSystem.get(uri, conf);
322 }
323 });
324 } catch (InterruptedException ex) {
325 LOG.error(ex);
326 throw new IOException("Failed to get the AbstractFileSystem for path: "
327 + uri, ex);
328 }
329 }
330
331 /**
332 * Protected Static Factory methods for getting a FileContexts
333 * that take a AbstractFileSystem as input. To be used for testing.
334 */
335
336 /**
337 * Create a FileContext with specified FS as default using the specified
338 * config.
339 *
340 * @param defFS
341 * @param aConf
342 * @return new FileContext with specifed FS as default.
343 */
344 public static FileContext getFileContext(final AbstractFileSystem defFS,
345 final Configuration aConf) {
346 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
347 }
348
349 /**
350 * Create a FileContext for specified file system using the default config.
351 *
352 * @param defaultFS
353 * @return a FileContext with the specified AbstractFileSystem
354 * as the default FS.
355 */
356 protected static FileContext getFileContext(
357 final AbstractFileSystem defaultFS) {
358 return getFileContext(defaultFS, new Configuration());
359 }
360
361 /**
362 * Static Factory methods for getting a FileContext.
363 * Note new file contexts are created for each call.
364 * The only singleton is the local FS context using the default config.
365 *
366 * Methods that use the default config: the default config read from the
367 * $HADOOP_CONFIG/core.xml,
368 * Unspecified key-values for config are defaulted from core-defaults.xml
369 * in the release jar.
370 *
371 * The keys relevant to the FileContext layer are extracted at time of
372 * construction. Changes to the config after the call are ignore
373 * by the FileContext layer.
374 * The conf is passed to lower layers like AbstractFileSystem and HDFS which
375 * pick up their own config variables.
376 */
377
378 /**
379 * Create a FileContext using the default config read from the
380 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
381 * from core-defaults.xml in the release jar.
382 *
383 * @throws UnsupportedFileSystemException If the file system from the default
384 * configuration is not supported
385 */
386 public static FileContext getFileContext()
387 throws UnsupportedFileSystemException {
388 return getFileContext(new Configuration());
389 }
390
391 /**
392 * @return a FileContext for the local file system using the default config.
393 * @throws UnsupportedFileSystemException If the file system for
394 * {@link FsConstants#LOCAL_FS_URI} is not supported.
395 */
396 public static FileContext getLocalFSFileContext()
397 throws UnsupportedFileSystemException {
398 return getFileContext(FsConstants.LOCAL_FS_URI);
399 }
400
401 /**
402 * Create a FileContext for specified URI using the default config.
403 *
404 * @param defaultFsUri
405 * @return a FileContext with the specified URI as the default FS.
406 *
407 * @throws UnsupportedFileSystemException If the file system for
408 * <code>defaultFsUri</code> is not supported
409 */
410 public static FileContext getFileContext(final URI defaultFsUri)
411 throws UnsupportedFileSystemException {
412 return getFileContext(defaultFsUri, new Configuration());
413 }
414
415 /**
416 * Create a FileContext for specified default URI using the specified config.
417 *
418 * @param defaultFsUri
419 * @param aConf
420 * @return new FileContext for specified uri
421 * @throws UnsupportedFileSystemException If the file system with specified is
422 * not supported
423 * @throws RuntimeException If the file system specified is supported but
424 * could not be instantiated, or if login fails.
425 */
426 public static FileContext getFileContext(final URI defaultFsUri,
427 final Configuration aConf) throws UnsupportedFileSystemException {
428 UserGroupInformation currentUser = null;
429 AbstractFileSystem defaultAfs = null;
430 try {
431 currentUser = UserGroupInformation.getCurrentUser();
432 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf);
433 } catch (UnsupportedFileSystemException ex) {
434 throw ex;
435 } catch (IOException ex) {
436 LOG.error(ex);
437 throw new RuntimeException(ex);
438 }
439 return getFileContext(defaultAfs, aConf);
440 }
441
442 /**
443 * Create a FileContext using the passed config. Generally it is better to use
444 * {@link #getFileContext(URI, Configuration)} instead of this one.
445 *
446 *
447 * @param aConf
448 * @return new FileContext
449 * @throws UnsupportedFileSystemException If file system in the config
450 * is not supported
451 */
452 public static FileContext getFileContext(final Configuration aConf)
453 throws UnsupportedFileSystemException {
454 return getFileContext(
455 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
456 aConf);
457 }
458
459 /**
460 * @param aConf - from which the FileContext is configured
461 * @return a FileContext for the local file system using the specified config.
462 *
463 * @throws UnsupportedFileSystemException If default file system in the config
464 * is not supported
465 *
466 */
467 public static FileContext getLocalFSFileContext(final Configuration aConf)
468 throws UnsupportedFileSystemException {
469 return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
470 }
471
472 /* This method is needed for tests. */
473 @InterfaceAudience.Private
474 @InterfaceStability.Unstable /* return type will change to AFS once
475 HADOOP-6223 is completed */
476 public AbstractFileSystem getDefaultFileSystem() {
477 return defaultFS;
478 }
479
480 /**
481 * Set the working directory for wd-relative names (such a "foo/bar"). Working
482 * directory feature is provided by simply prefixing relative names with the
483 * working dir. Note this is different from Unix where the wd is actually set
484 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works
485 * better in a distributed environment that has multiple independent roots.
486 * {@link #getWorkingDirectory()} should return what setWorkingDir() set.
487 *
488 * @param newWDir new working directory
489 * @throws IOException
490 * <br>
491 * NewWdir can be one of:
492 * <ul>
493 * <li>relative path: "foo/bar";</li>
494 * <li>absolute without scheme: "/foo/bar"</li>
495 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li>
496 * </ul>
497 * <br>
498 * Illegal WDs:
499 * <ul>
500 * <li>relative with scheme: "xx:foo/bar"</li>
501 * <li>non existent directory</li>
502 * </ul>
503 */
504 public void setWorkingDirectory(final Path newWDir) throws IOException {
505 checkNotSchemeWithRelative(newWDir);
506 /* wd is stored as a fully qualified path. We check if the given
507 * path is not relative first since resolve requires and returns
508 * an absolute path.
509 */
510 final Path newWorkingDir = new Path(workingDir, newWDir);
511 FileStatus status = getFileStatus(newWorkingDir);
512 if (status.isFile()) {
513 throw new FileNotFoundException("Cannot setWD to a file");
514 }
515 workingDir = newWorkingDir;
516 }
517
518 /**
519 * Gets the working directory for wd-relative names (such a "foo/bar").
520 */
521 public Path getWorkingDirectory() {
522 return workingDir;
523 }
524
525 /**
526 * Gets the ugi in the file-context
527 * @return UserGroupInformation
528 */
529 public UserGroupInformation getUgi() {
530 return ugi;
531 }
532
533 /**
534 * Return the current user's home directory in this file system.
535 * The default implementation returns "/user/$USER/".
536 * @return the home directory
537 */
538 public Path getHomeDirectory() {
539 return defaultFS.getHomeDirectory();
540 }
541
542 /**
543 *
544 * @return the umask of this FileContext
545 */
546 public FsPermission getUMask() {
547 return umask;
548 }
549
550 /**
551 * Set umask to the supplied parameter.
552 * @param newUmask the new umask
553 */
554 public void setUMask(final FsPermission newUmask) {
555 umask = newUmask;
556 }
557
558
559 /**
560 * Resolve the path following any symlinks or mount points
561 * @param f to be resolved
562 * @return fully qualified resolved path
563 *
564 * @throws FileNotFoundException If <code>f</code> does not exist
565 * @throws AccessControlException if access denied
566 * @throws IOException If an IO Error occurred
567 *
568 * Exceptions applicable to file systems accessed over RPC:
569 * @throws RpcClientException If an exception occurred in the RPC client
570 * @throws RpcServerException If an exception occurred in the RPC server
571 * @throws UnexpectedServerException If server implementation throws
572 * undeclared exception to RPC server
573 *
574 * RuntimeExceptions:
575 * @throws InvalidPathException If path <code>f</code> is not valid
576 */
577 public Path resolvePath(final Path f) throws FileNotFoundException,
578 UnresolvedLinkException, AccessControlException, IOException {
579 return resolve(f);
580 }
581
582 /**
583 * Make the path fully qualified if it is isn't.
584 * A Fully-qualified path has scheme and authority specified and an absolute
585 * path.
586 * Use the default file system and working dir in this FileContext to qualify.
587 * @param path
588 * @return qualified path
589 */
590 public Path makeQualified(final Path path) {
591 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
592 }
593
594 /**
595 * Create or overwrite file on indicated path and returns an output stream for
596 * writing into the file.
597 *
598 * @param f the file name to open
599 * @param createFlag gives the semantics of create; see {@link CreateFlag}
600 * @param opts file creation options; see {@link Options.CreateOpts}.
601 * <ul>
602 * <li>Progress - to report progress on the operation - default null
603 * <li>Permission - umask is applied against permisssion: default is
604 * FsPermissions:getDefault()
605 *
606 * <li>CreateParent - create missing parent path; default is to not
607 * to create parents
608 * <li>The defaults for the following are SS defaults of the file
609 * server implementing the target path. Not all parameters make sense
610 * for all kinds of file system - eg. localFS ignores Blocksize,
611 * replication, checksum
612 * <ul>
613 * <li>BufferSize - buffersize used in FSDataOutputStream
614 * <li>Blocksize - block size for file blocks
615 * <li>ReplicationFactor - replication for blocks
616 * <li>BytesPerChecksum - bytes per checksum
617 * </ul>
618 * </ul>
619 *
620 * @return {@link FSDataOutputStream} for created file
621 *
622 * @throws AccessControlException If access is denied
623 * @throws FileAlreadyExistsException If file <code>f</code> already exists
624 * @throws FileNotFoundException If parent of <code>f</code> does not exist
625 * and <code>createParent</code> is false
626 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a
627 * directory.
628 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
629 * not supported
630 * @throws IOException If an I/O error occurred
631 *
632 * Exceptions applicable to file systems accessed over RPC:
633 * @throws RpcClientException If an exception occurred in the RPC client
634 * @throws RpcServerException If an exception occurred in the RPC server
635 * @throws UnexpectedServerException If server implementation throws
636 * undeclared exception to RPC server
637 *
638 * RuntimeExceptions:
639 * @throws InvalidPathException If path <code>f</code> is not valid
640 */
641 public FSDataOutputStream create(final Path f,
642 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
643 throws AccessControlException, FileAlreadyExistsException,
644 FileNotFoundException, ParentNotDirectoryException,
645 UnsupportedFileSystemException, IOException {
646 Path absF = fixRelativePart(f);
647
648 // If one of the options is a permission, extract it & apply umask
649 // If not, add a default Perms and apply umask;
650 // AbstractFileSystem#create
651
652 CreateOpts.Perms permOpt =
653 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
654 FsPermission permission = (permOpt != null) ? permOpt.getValue() :
655 FsPermission.getDefault();
656 permission = permission.applyUMask(umask);
657
658 final CreateOpts[] updatedOpts =
659 CreateOpts.setOpt(CreateOpts.perms(permission), opts);
660 return new FSLinkResolver<FSDataOutputStream>() {
661 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
662 throws IOException {
663 return fs.create(p, createFlag, updatedOpts);
664 }
665 }.resolve(this, absF);
666 }
667
668 /**
669 * Make(create) a directory and all the non-existent parents.
670 *
671 * @param dir - the dir to make
672 * @param permission - permissions is set permission&~umask
673 * @param createParent - if true then missing parent dirs are created if false
674 * then parent must exist
675 *
676 * @throws AccessControlException If access is denied
677 * @throws FileAlreadyExistsException If directory <code>dir</code> already
678 * exists
679 * @throws FileNotFoundException If parent of <code>dir</code> does not exist
680 * and <code>createParent</code> is false
681 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a
682 * directory
683 * @throws UnsupportedFileSystemException If file system for <code>dir</code>
684 * is not supported
685 * @throws IOException If an I/O error occurred
686 *
687 * Exceptions applicable to file systems accessed over RPC:
688 * @throws RpcClientException If an exception occurred in the RPC client
689 * @throws UnexpectedServerException If server implementation throws
690 * undeclared exception to RPC server
691 *
692 * RuntimeExceptions:
693 * @throws InvalidPathException If path <code>dir</code> is not valid
694 */
695 public void mkdir(final Path dir, final FsPermission permission,
696 final boolean createParent) throws AccessControlException,
697 FileAlreadyExistsException, FileNotFoundException,
698 ParentNotDirectoryException, UnsupportedFileSystemException,
699 IOException {
700 final Path absDir = fixRelativePart(dir);
701 final FsPermission absFerms = (permission == null ?
702 FsPermission.getDefault() : permission).applyUMask(umask);
703 new FSLinkResolver<Void>() {
704 public Void next(final AbstractFileSystem fs, final Path p)
705 throws IOException, UnresolvedLinkException {
706 fs.mkdir(p, absFerms, createParent);
707 return null;
708 }
709 }.resolve(this, absDir);
710 }
711
712 /**
713 * Delete a file.
714 * @param f the path to delete.
715 * @param recursive if path is a directory and set to
716 * true, the directory is deleted else throws an exception. In
717 * case of a file the recursive can be set to either true or false.
718 *
719 * @throws AccessControlException If access is denied
720 * @throws FileNotFoundException If <code>f</code> does not exist
721 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
722 * not supported
723 * @throws IOException If an I/O error occurred
724 *
725 * Exceptions applicable to file systems accessed over RPC:
726 * @throws RpcClientException If an exception occurred in the RPC client
727 * @throws RpcServerException If an exception occurred in the RPC server
728 * @throws UnexpectedServerException If server implementation throws
729 * undeclared exception to RPC server
730 *
731 * RuntimeExceptions:
732 * @throws InvalidPathException If path <code>f</code> is invalid
733 */
734 public boolean delete(final Path f, final boolean recursive)
735 throws AccessControlException, FileNotFoundException,
736 UnsupportedFileSystemException, IOException {
737 Path absF = fixRelativePart(f);
738 return new FSLinkResolver<Boolean>() {
739 public Boolean next(final AbstractFileSystem fs, final Path p)
740 throws IOException, UnresolvedLinkException {
741 return Boolean.valueOf(fs.delete(p, recursive));
742 }
743 }.resolve(this, absF);
744 }
745
746 /**
747 * Opens an FSDataInputStream at the indicated Path using
748 * default buffersize.
749 * @param f the file name to open
750 *
751 * @throws AccessControlException If access is denied
752 * @throws FileNotFoundException If file <code>f</code> does not exist
753 * @throws UnsupportedFileSystemException If file system for <code>f</code>
754 * is not supported
755 * @throws IOException If an I/O error occurred
756 *
757 * Exceptions applicable to file systems accessed over RPC:
758 * @throws RpcClientException If an exception occurred in the RPC client
759 * @throws RpcServerException If an exception occurred in the RPC server
760 * @throws UnexpectedServerException If server implementation throws
761 * undeclared exception to RPC server
762 */
763 public FSDataInputStream open(final Path f) throws AccessControlException,
764 FileNotFoundException, UnsupportedFileSystemException, IOException {
765 final Path absF = fixRelativePart(f);
766 return new FSLinkResolver<FSDataInputStream>() {
767 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
768 throws IOException, UnresolvedLinkException {
769 return fs.open(p);
770 }
771 }.resolve(this, absF);
772 }
773
774 /**
775 * Opens an FSDataInputStream at the indicated Path.
776 *
777 * @param f the file name to open
778 * @param bufferSize the size of the buffer to be used.
779 *
780 * @throws AccessControlException If access is denied
781 * @throws FileNotFoundException If file <code>f</code> does not exist
782 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
783 * not supported
784 * @throws IOException If an I/O error occurred
785 *
786 * Exceptions applicable to file systems accessed over RPC:
787 * @throws RpcClientException If an exception occurred in the RPC client
788 * @throws RpcServerException If an exception occurred in the RPC server
789 * @throws UnexpectedServerException If server implementation throws
790 * undeclared exception to RPC server
791 */
792 public FSDataInputStream open(final Path f, final int bufferSize)
793 throws AccessControlException, FileNotFoundException,
794 UnsupportedFileSystemException, IOException {
795 final Path absF = fixRelativePart(f);
796 return new FSLinkResolver<FSDataInputStream>() {
797 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
798 throws IOException, UnresolvedLinkException {
799 return fs.open(p, bufferSize);
800 }
801 }.resolve(this, absF);
802 }
803
804 /**
805 * Set replication for an existing file.
806 *
807 * @param f file name
808 * @param replication new replication
809 *
810 * @return true if successful
811 *
812 * @throws AccessControlException If access is denied
813 * @throws FileNotFoundException If file <code>f</code> does not exist
814 * @throws IOException If an I/O error occurred
815 *
816 * Exceptions applicable to file systems accessed over RPC:
817 * @throws RpcClientException If an exception occurred in the RPC client
818 * @throws RpcServerException If an exception occurred in the RPC server
819 * @throws UnexpectedServerException If server implementation throws
820 * undeclared exception to RPC server
821 */
822 public boolean setReplication(final Path f, final short replication)
823 throws AccessControlException, FileNotFoundException,
824 IOException {
825 final Path absF = fixRelativePart(f);
826 return new FSLinkResolver<Boolean>() {
827 public Boolean next(final AbstractFileSystem fs, final Path p)
828 throws IOException, UnresolvedLinkException {
829 return Boolean.valueOf(fs.setReplication(p, replication));
830 }
831 }.resolve(this, absF);
832 }
833
834 /**
835 * Renames Path src to Path dst
836 * <ul>
837 * <li
838 * <li>Fails if src is a file and dst is a directory.
839 * <li>Fails if src is a directory and dst is a file.
840 * <li>Fails if the parent of dst does not exist or is a file.
841 * </ul>
842 * <p>
843 * If OVERWRITE option is not passed as an argument, rename fails if the dst
844 * already exists.
845 * <p>
846 * If OVERWRITE option is passed as an argument, rename overwrites the dst if
847 * it is a file or an empty directory. Rename fails if dst is a non-empty
848 * directory.
849 * <p>
850 * Note that atomicity of rename is dependent on the file system
851 * implementation. Please refer to the file system documentation for details
852 * <p>
853 *
854 * @param src path to be renamed
855 * @param dst new path after rename
856 *
857 * @throws AccessControlException If access is denied
858 * @throws FileAlreadyExistsException If <code>dst</code> already exists and
859 * <code>options</options> has {@link Options.Rename#OVERWRITE}
860 * option false.
861 * @throws FileNotFoundException If <code>src</code> does not exist
862 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a
863 * directory
864 * @throws UnsupportedFileSystemException If file system for <code>src</code>
865 * and <code>dst</code> is not supported
866 * @throws IOException If an I/O error occurred
867 *
868 * Exceptions applicable to file systems accessed over RPC:
869 * @throws RpcClientException If an exception occurred in the RPC client
870 * @throws RpcServerException If an exception occurred in the RPC server
871 * @throws UnexpectedServerException If server implementation throws
872 * undeclared exception to RPC server
873 */
874 public void rename(final Path src, final Path dst,
875 final Options.Rename... options) throws AccessControlException,
876 FileAlreadyExistsException, FileNotFoundException,
877 ParentNotDirectoryException, UnsupportedFileSystemException,
878 IOException {
879 final Path absSrc = fixRelativePart(src);
880 final Path absDst = fixRelativePart(dst);
881 AbstractFileSystem srcFS = getFSofPath(absSrc);
882 AbstractFileSystem dstFS = getFSofPath(absDst);
883 if(!srcFS.getUri().equals(dstFS.getUri())) {
884 throw new IOException("Renames across AbstractFileSystems not supported");
885 }
886 try {
887 srcFS.rename(absSrc, absDst, options);
888 } catch (UnresolvedLinkException e) {
889 /* We do not know whether the source or the destination path
890 * was unresolved. Resolve the source path up until the final
891 * path component, then fully resolve the destination.
892 */
893 final Path source = resolveIntermediate(absSrc);
894 new FSLinkResolver<Void>() {
895 public Void next(final AbstractFileSystem fs, final Path p)
896 throws IOException, UnresolvedLinkException {
897 fs.rename(source, p, options);
898 return null;
899 }
900 }.resolve(this, absDst);
901 }
902 }
903
904 /**
905 * Set permission of a path.
906 * @param f
907 * @param permission - the new absolute permission (umask is not applied)
908 *
909 * @throws AccessControlException If access is denied
910 * @throws FileNotFoundException If <code>f</code> does not exist
911 * @throws UnsupportedFileSystemException If file system for <code>f</code>
912 * is not supported
913 * @throws IOException If an I/O error occurred
914 *
915 * Exceptions applicable to file systems accessed over RPC:
916 * @throws RpcClientException If an exception occurred in the RPC client
917 * @throws RpcServerException If an exception occurred in the RPC server
918 * @throws UnexpectedServerException If server implementation throws
919 * undeclared exception to RPC server
920 */
921 public void setPermission(final Path f, final FsPermission permission)
922 throws AccessControlException, FileNotFoundException,
923 UnsupportedFileSystemException, IOException {
924 final Path absF = fixRelativePart(f);
925 new FSLinkResolver<Void>() {
926 public Void next(final AbstractFileSystem fs, final Path p)
927 throws IOException, UnresolvedLinkException {
928 fs.setPermission(p, permission);
929 return null;
930 }
931 }.resolve(this, absF);
932 }
933
934 /**
935 * Set owner of a path (i.e. a file or a directory). The parameters username
936 * and groupname cannot both be null.
937 *
938 * @param f The path
939 * @param username If it is null, the original username remains unchanged.
940 * @param groupname If it is null, the original groupname remains unchanged.
941 *
942 * @throws AccessControlException If access is denied
943 * @throws FileNotFoundException If <code>f</code> does not exist
944 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
945 * not supported
946 * @throws IOException If an I/O error occurred
947 *
948 * Exceptions applicable to file systems accessed over RPC:
949 * @throws RpcClientException If an exception occurred in the RPC client
950 * @throws RpcServerException If an exception occurred in the RPC server
951 * @throws UnexpectedServerException If server implementation throws
952 * undeclared exception to RPC server
953 *
954 * RuntimeExceptions:
955 * @throws HadoopIllegalArgumentException If <code>username</code> or
956 * <code>groupname</code> is invalid.
957 */
958 public void setOwner(final Path f, final String username,
959 final String groupname) throws AccessControlException,
960 UnsupportedFileSystemException, FileNotFoundException,
961 IOException {
962 if ((username == null) && (groupname == null)) {
963 throw new HadoopIllegalArgumentException(
964 "username and groupname cannot both be null");
965 }
966 final Path absF = fixRelativePart(f);
967 new FSLinkResolver<Void>() {
968 public Void next(final AbstractFileSystem fs, final Path p)
969 throws IOException, UnresolvedLinkException {
970 fs.setOwner(p, username, groupname);
971 return null;
972 }
973 }.resolve(this, absF);
974 }
975
976 /**
977 * Set access time of a file.
978 * @param f The path
979 * @param mtime Set the modification time of this file.
980 * The number of milliseconds since epoch (Jan 1, 1970).
981 * A value of -1 means that this call should not set modification time.
982 * @param atime Set the access time of this file.
983 * The number of milliseconds since Jan 1, 1970.
984 * A value of -1 means that this call should not set access time.
985 *
986 * @throws AccessControlException If access is denied
987 * @throws FileNotFoundException If <code>f</code> does not exist
988 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
989 * not supported
990 * @throws IOException If an I/O error occurred
991 *
992 * Exceptions applicable to file systems accessed over RPC:
993 * @throws RpcClientException If an exception occurred in the RPC client
994 * @throws RpcServerException If an exception occurred in the RPC server
995 * @throws UnexpectedServerException If server implementation throws
996 * undeclared exception to RPC server
997 */
998 public void setTimes(final Path f, final long mtime, final long atime)
999 throws AccessControlException, FileNotFoundException,
1000 UnsupportedFileSystemException, IOException {
1001 final Path absF = fixRelativePart(f);
1002 new FSLinkResolver<Void>() {
1003 public Void next(final AbstractFileSystem fs, final Path p)
1004 throws IOException, UnresolvedLinkException {
1005 fs.setTimes(p, mtime, atime);
1006 return null;
1007 }
1008 }.resolve(this, absF);
1009 }
1010
1011 /**
1012 * Get the checksum of a file.
1013 *
1014 * @param f file path
1015 *
1016 * @return The file checksum. The default return value is null,
1017 * which indicates that no checksum algorithm is implemented
1018 * in the corresponding FileSystem.
1019 *
1020 * @throws AccessControlException If access is denied
1021 * @throws FileNotFoundException If <code>f</code> does not exist
1022 * @throws IOException If an I/O error occurred
1023 *
1024 * Exceptions applicable to file systems accessed over RPC:
1025 * @throws RpcClientException If an exception occurred in the RPC client
1026 * @throws RpcServerException If an exception occurred in the RPC server
1027 * @throws UnexpectedServerException If server implementation throws
1028 * undeclared exception to RPC server
1029 */
1030 public FileChecksum getFileChecksum(final Path f)
1031 throws AccessControlException, FileNotFoundException,
1032 IOException {
1033 final Path absF = fixRelativePart(f);
1034 return new FSLinkResolver<FileChecksum>() {
1035 public FileChecksum next(final AbstractFileSystem fs, final Path p)
1036 throws IOException, UnresolvedLinkException {
1037 return fs.getFileChecksum(p);
1038 }
1039 }.resolve(this, absF);
1040 }
1041
1042 /**
1043 * Set the verify checksum flag for the file system denoted by the path.
1044 * This is only applicable if the
1045 * corresponding FileSystem supports checksum. By default doesn't do anything.
1046 * @param verifyChecksum
1047 * @param f set the verifyChecksum for the Filesystem containing this path
1048 *
1049 * @throws AccessControlException If access is denied
1050 * @throws FileNotFoundException If <code>f</code> does not exist
1051 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1052 * not supported
1053 * @throws IOException If an I/O error occurred
1054 *
1055 * Exceptions applicable to file systems accessed over RPC:
1056 * @throws RpcClientException If an exception occurred in the RPC client
1057 * @throws RpcServerException If an exception occurred in the RPC server
1058 * @throws UnexpectedServerException If server implementation throws
1059 * undeclared exception to RPC server
1060 */
1061 public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
1062 throws AccessControlException, FileNotFoundException,
1063 UnsupportedFileSystemException, IOException {
1064 final Path absF = resolve(fixRelativePart(f));
1065 getFSofPath(absF).setVerifyChecksum(verifyChecksum);
1066 }
1067
1068 /**
1069 * Return a file status object that represents the path.
1070 * @param f The path we want information from
1071 *
1072 * @return a FileStatus object
1073 *
1074 * @throws AccessControlException If access is denied
1075 * @throws FileNotFoundException If <code>f</code> does not exist
1076 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1077 * not supported
1078 * @throws IOException If an I/O error occurred
1079 *
1080 * Exceptions applicable to file systems accessed over RPC:
1081 * @throws RpcClientException If an exception occurred in the RPC client
1082 * @throws RpcServerException If an exception occurred in the RPC server
1083 * @throws UnexpectedServerException If server implementation throws
1084 * undeclared exception to RPC server
1085 */
1086 public FileStatus getFileStatus(final Path f) throws AccessControlException,
1087 FileNotFoundException, UnsupportedFileSystemException, IOException {
1088 final Path absF = fixRelativePart(f);
1089 return new FSLinkResolver<FileStatus>() {
1090 public FileStatus next(final AbstractFileSystem fs, final Path p)
1091 throws IOException, UnresolvedLinkException {
1092 return fs.getFileStatus(p);
1093 }
1094 }.resolve(this, absF);
1095 }
1096
1097 /**
1098 * Return a fully qualified version of the given symlink target if it
1099 * has no scheme and authority. Partially and fully qualified paths
1100 * are returned unmodified.
1101 * @param pathFS The AbstractFileSystem of the path
1102 * @param pathWithLink Path that contains the symlink
1103 * @param target The symlink's absolute target
1104 * @return Fully qualified version of the target.
1105 */
1106 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
1107 Path pathWithLink, Path target) {
1108 // NB: makeQualified uses the target's scheme and authority, if
1109 // specified, and the scheme and authority of pathFS, if not.
1110 final String scheme = target.toUri().getScheme();
1111 final String auth = target.toUri().getAuthority();
1112 return (scheme == null && auth == null)
1113 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
1114 : target;
1115 }
1116
1117 /**
1118 * Return a file status object that represents the path. If the path
1119 * refers to a symlink then the FileStatus of the symlink is returned.
1120 * The behavior is equivalent to #getFileStatus() if the underlying
1121 * file system does not support symbolic links.
1122 * @param f The path we want information from.
1123 * @return A FileStatus object
1124 *
1125 * @throws AccessControlException If access is denied
1126 * @throws FileNotFoundException If <code>f</code> does not exist
1127 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1128 * not supported
1129 * @throws IOException If an I/O error occurred
1130 */
1131 public FileStatus getFileLinkStatus(final Path f)
1132 throws AccessControlException, FileNotFoundException,
1133 UnsupportedFileSystemException, IOException {
1134 final Path absF = fixRelativePart(f);
1135 return new FSLinkResolver<FileStatus>() {
1136 public FileStatus next(final AbstractFileSystem fs, final Path p)
1137 throws IOException, UnresolvedLinkException {
1138 FileStatus fi = fs.getFileLinkStatus(p);
1139 if (fi.isSymlink()) {
1140 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
1141 }
1142 return fi;
1143 }
1144 }.resolve(this, absF);
1145 }
1146
1147 /**
1148 * Returns the target of the given symbolic link as it was specified
1149 * when the link was created. Links in the path leading up to the
1150 * final path component are resolved transparently.
1151 *
1152 * @param f the path to return the target of
1153 * @return The un-interpreted target of the symbolic link.
1154 *
1155 * @throws AccessControlException If access is denied
1156 * @throws FileNotFoundException If path <code>f</code> does not exist
1157 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1158 * not supported
1159 * @throws IOException If the given path does not refer to a symlink
1160 * or an I/O error occurred
1161 */
1162 public Path getLinkTarget(final Path f) throws AccessControlException,
1163 FileNotFoundException, UnsupportedFileSystemException, IOException {
1164 final Path absF = fixRelativePart(f);
1165 return new FSLinkResolver<Path>() {
1166 public Path next(final AbstractFileSystem fs, final Path p)
1167 throws IOException, UnresolvedLinkException {
1168 FileStatus fi = fs.getFileLinkStatus(p);
1169 return fi.getSymlink();
1170 }
1171 }.resolve(this, absF);
1172 }
1173
1174 /**
1175 * Return blockLocation of the given file for the given offset and len.
1176 * For a nonexistent file or regions, null will be returned.
1177 *
1178 * This call is most helpful with DFS, where it returns
1179 * hostnames of machines that contain the given file.
1180 *
1181 * @param f - get blocklocations of this file
1182 * @param start position (byte offset)
1183 * @param len (in bytes)
1184 *
1185 * @return block locations for given file at specified offset of len
1186 *
1187 * @throws AccessControlException If access is denied
1188 * @throws FileNotFoundException If <code>f</code> does not exist
1189 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1190 * not supported
1191 * @throws IOException If an I/O error occurred
1192 *
1193 * Exceptions applicable to file systems accessed over RPC:
1194 * @throws RpcClientException If an exception occurred in the RPC client
1195 * @throws RpcServerException If an exception occurred in the RPC server
1196 * @throws UnexpectedServerException If server implementation throws
1197 * undeclared exception to RPC server
1198 *
1199 * RuntimeExceptions:
1200 * @throws InvalidPathException If path <code>f</code> is invalid
1201 */
1202 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
1203 @InterfaceStability.Evolving
1204 public BlockLocation[] getFileBlockLocations(final Path f, final long start,
1205 final long len) throws AccessControlException, FileNotFoundException,
1206 UnsupportedFileSystemException, IOException {
1207 final Path absF = fixRelativePart(f);
1208 return new FSLinkResolver<BlockLocation[]>() {
1209 public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
1210 throws IOException, UnresolvedLinkException {
1211 return fs.getFileBlockLocations(p, start, len);
1212 }
1213 }.resolve(this, absF);
1214 }
1215
1216 /**
1217 * Returns a status object describing the use and capacity of the
1218 * file system denoted by the Parh argument p.
1219 * If the file system has multiple partitions, the
1220 * use and capacity of the partition pointed to by the specified
1221 * path is reflected.
1222 *
1223 * @param f Path for which status should be obtained. null means the
1224 * root partition of the default file system.
1225 *
1226 * @return a FsStatus object
1227 *
1228 * @throws AccessControlException If access is denied
1229 * @throws FileNotFoundException If <code>f</code> does not exist
1230 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1231 * not supported
1232 * @throws IOException If an I/O error occurred
1233 *
1234 * Exceptions applicable to file systems accessed over RPC:
1235 * @throws RpcClientException If an exception occurred in the RPC client
1236 * @throws RpcServerException If an exception occurred in the RPC server
1237 * @throws UnexpectedServerException If server implementation throws
1238 * undeclared exception to RPC server
1239 */
1240 public FsStatus getFsStatus(final Path f) throws AccessControlException,
1241 FileNotFoundException, UnsupportedFileSystemException, IOException {
1242 if (f == null) {
1243 return defaultFS.getFsStatus();
1244 }
1245 final Path absF = fixRelativePart(f);
1246 return new FSLinkResolver<FsStatus>() {
1247 public FsStatus next(final AbstractFileSystem fs, final Path p)
1248 throws IOException, UnresolvedLinkException {
1249 return fs.getFsStatus(p);
1250 }
1251 }.resolve(this, absF);
1252 }
1253
1254 /**
1255 * Creates a symbolic link to an existing file. An exception is thrown if
1256 * the symlink exits, the user does not have permission to create symlink,
1257 * or the underlying file system does not support symlinks.
1258 *
1259 * Symlink permissions are ignored, access to a symlink is determined by
1260 * the permissions of the symlink target.
1261 *
1262 * Symlinks in paths leading up to the final path component are resolved
1263 * transparently. If the final path component refers to a symlink some
1264 * functions operate on the symlink itself, these are:
1265 * - delete(f) and deleteOnExit(f) - Deletes the symlink.
1266 * - rename(src, dst) - If src refers to a symlink, the symlink is
1267 * renamed. If dst refers to a symlink, the symlink is over-written.
1268 * - getLinkTarget(f) - Returns the target of the symlink.
1269 * - getFileLinkStatus(f) - Returns a FileStatus object describing
1270 * the symlink.
1271 * Some functions, create() and mkdir(), expect the final path component
1272 * does not exist. If they are given a path that refers to a symlink that
1273 * does exist they behave as if the path referred to an existing file or
1274 * directory. All other functions fully resolve, ie follow, the symlink.
1275 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
1276 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
1277 * getFsStatus, getFileStatus, exists, and listStatus.
1278 *
1279 * Symlink targets are stored as given to createSymlink, assuming the
1280 * underlying file system is capable of storing a fully qualified URI.
1281 * Dangling symlinks are permitted. FileContext supports four types of
1282 * symlink targets, and resolves them as follows
1283 * <pre>
1284 * Given a path referring to a symlink of form:
1285 *
1286 * <---X--->
1287 * fs://host/A/B/link
1288 * <-----Y----->
1289 *
1290 * In this path X is the scheme and authority that identify the file system,
1291 * and Y is the path leading up to the final path component "link". If Y is
1292 * a symlink itself then let Y' be the target of Y and X' be the scheme and
1293 * authority of Y'. Symlink targets may:
1294 *
1295 * 1. Fully qualified URIs
1296 *
1297 * fs://hostX/A/B/file Resolved according to the target file system.
1298 *
1299 * 2. Partially qualified URIs (eg scheme but no host)
1300 *
1301 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving
1302 * a symlink to hdfs:///A results in an exception because
1303 * HDFS URIs must be fully qualified, while a symlink to
1304 * file:///A will not since Hadoop's local file systems
1305 * require partially qualified URIs.
1306 *
1307 * 3. Relative paths
1308 *
1309 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
1310 * is "../B/file" then [Y'][path] is hdfs://host/B/file
1311 *
1312 * 4. Absolute paths
1313 *
1314 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
1315 * is "/file" then [X][path] is hdfs://host/file
1316 * </pre>
1317 *
1318 * @param target the target of the symbolic link
1319 * @param link the path to be created that points to target
1320 * @param createParent if true then missing parent dirs are created if
1321 * false then parent must exist
1322 *
1323 *
1324 * @throws AccessControlException If access is denied
1325 * @throws FileAlreadyExistsException If file <code>linkcode> already exists
1326 * @throws FileNotFoundException If <code>target</code> does not exist
1327 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a
1328 * directory.
1329 * @throws UnsupportedFileSystemException If file system for
1330 * <code>target</code> or <code>link</code> is not supported
1331 * @throws IOException If an I/O error occurred
1332 */
1333 public void createSymlink(final Path target, final Path link,
1334 final boolean createParent) throws AccessControlException,
1335 FileAlreadyExistsException, FileNotFoundException,
1336 ParentNotDirectoryException, UnsupportedFileSystemException,
1337 IOException {
1338 final Path nonRelLink = fixRelativePart(link);
1339 new FSLinkResolver<Void>() {
1340 public Void next(final AbstractFileSystem fs, final Path p)
1341 throws IOException, UnresolvedLinkException {
1342 fs.createSymlink(target, p, createParent);
1343 return null;
1344 }
1345 }.resolve(this, nonRelLink);
1346 }
1347
1348 /**
1349 * List the statuses of the files/directories in the given path if the path is
1350 * a directory.
1351 *
1352 * @param f is the path
1353 *
1354 * @return an iterator that traverses statuses of the files/directories
1355 * in the given path
1356 *
1357 * @throws AccessControlException If access is denied
1358 * @throws FileNotFoundException If <code>f</code> does not exist
1359 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1360 * not supported
1361 * @throws IOException If an I/O error occurred
1362 *
1363 * Exceptions applicable to file systems accessed over RPC:
1364 * @throws RpcClientException If an exception occurred in the RPC client
1365 * @throws RpcServerException If an exception occurred in the RPC server
1366 * @throws UnexpectedServerException If server implementation throws
1367 * undeclared exception to RPC server
1368 */
1369 public RemoteIterator<FileStatus> listStatus(final Path f) throws
1370 AccessControlException, FileNotFoundException,
1371 UnsupportedFileSystemException, IOException {
1372 final Path absF = fixRelativePart(f);
1373 return new FSLinkResolver<RemoteIterator<FileStatus>>() {
1374 public RemoteIterator<FileStatus> next(
1375 final AbstractFileSystem fs, final Path p)
1376 throws IOException, UnresolvedLinkException {
1377 return fs.listStatusIterator(p);
1378 }
1379 }.resolve(this, absF);
1380 }
1381
1382 /**
1383 * @return an iterator over the corrupt files under the given path
1384 * (may contain duplicates if a file has more than one corrupt block)
1385 * @throws IOException
1386 */
1387 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1388 throws IOException {
1389 final Path absF = fixRelativePart(path);
1390 return new FSLinkResolver<RemoteIterator<Path>>() {
1391 @Override
1392 public RemoteIterator<Path> next(final AbstractFileSystem fs,
1393 final Path p)
1394 throws IOException, UnresolvedLinkException {
1395 return fs.listCorruptFileBlocks(p);
1396 }
1397 }.resolve(this, absF);
1398 }
1399
1400 /**
1401 * List the statuses of the files/directories in the given path if the path is
1402 * a directory.
1403 * Return the file's status and block locations If the path is a file.
1404 *
1405 * If a returned status is a file, it contains the file's block locations.
1406 *
1407 * @param f is the path
1408 *
1409 * @return an iterator that traverses statuses of the files/directories
1410 * in the given path
1411 * If any IO exception (for example the input directory gets deleted while
1412 * listing is being executed), next() or hasNext() of the returned iterator
1413 * may throw a RuntimeException with the io exception as the cause.
1414 *
1415 * @throws AccessControlException If access is denied
1416 * @throws FileNotFoundException If <code>f</code> does not exist
1417 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1418 * not supported
1419 * @throws IOException If an I/O error occurred
1420 *
1421 * Exceptions applicable to file systems accessed over RPC:
1422 * @throws RpcClientException If an exception occurred in the RPC client
1423 * @throws RpcServerException If an exception occurred in the RPC server
1424 * @throws UnexpectedServerException If server implementation throws
1425 * undeclared exception to RPC server
1426 */
1427 public RemoteIterator<LocatedFileStatus> listLocatedStatus(
1428 final Path f) throws
1429 AccessControlException, FileNotFoundException,
1430 UnsupportedFileSystemException, IOException {
1431 final Path absF = fixRelativePart(f);
1432 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() {
1433 public RemoteIterator<LocatedFileStatus> next(
1434 final AbstractFileSystem fs, final Path p)
1435 throws IOException, UnresolvedLinkException {
1436 return fs.listLocatedStatus(p);
1437 }
1438 }.resolve(this, absF);
1439 }
1440
1441 /**
1442 * Mark a path to be deleted on JVM shutdown.
1443 *
1444 * @param f the existing path to delete.
1445 *
1446 * @return true if deleteOnExit is successful, otherwise false.
1447 *
1448 * @throws AccessControlException If access is denied
1449 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1450 * not supported
1451 * @throws IOException If an I/O error occurred
1452 *
1453 * Exceptions applicable to file systems accessed over RPC:
1454 * @throws RpcClientException If an exception occurred in the RPC client
1455 * @throws RpcServerException If an exception occurred in the RPC server
1456 * @throws UnexpectedServerException If server implementation throws
1457 * undeclared exception to RPC server
1458 */
1459 public boolean deleteOnExit(Path f) throws AccessControlException,
1460 IOException {
1461 if (!this.util().exists(f)) {
1462 return false;
1463 }
1464 synchronized (DELETE_ON_EXIT) {
1465 if (DELETE_ON_EXIT.isEmpty()) {
1466 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY);
1467 }
1468
1469 Set<Path> set = DELETE_ON_EXIT.get(this);
1470 if (set == null) {
1471 set = new TreeSet<Path>();
1472 DELETE_ON_EXIT.put(this, set);
1473 }
1474 set.add(f);
1475 }
1476 return true;
1477 }
1478
1479 private final Util util;
1480 public Util util() {
1481 return util;
1482 }
1483
1484
1485 /**
1486 * Utility/library methods built over the basic FileContext methods.
1487 * Since this are library functions, the oprtation are not atomic
1488 * and some of them may partially complete if other threads are making
1489 * changes to the same part of the name space.
1490 */
1491 public class Util {
1492 /**
1493 * Does the file exist?
1494 * Note: Avoid using this method if you already have FileStatus in hand.
1495 * Instead reuse the FileStatus
1496 * @param f the file or dir to be checked
1497 *
1498 * @throws AccessControlException If access is denied
1499 * @throws IOException If an I/O error occurred
1500 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1501 * not supported
1502 *
1503 * Exceptions applicable to file systems accessed over RPC:
1504 * @throws RpcClientException If an exception occurred in the RPC client
1505 * @throws RpcServerException If an exception occurred in the RPC server
1506 * @throws UnexpectedServerException If server implementation throws
1507 * undeclared exception to RPC server
1508 */
1509 public boolean exists(final Path f) throws AccessControlException,
1510 UnsupportedFileSystemException, IOException {
1511 try {
1512 FileStatus fs = FileContext.this.getFileStatus(f);
1513 assert fs != null;
1514 return true;
1515 } catch (FileNotFoundException e) {
1516 return false;
1517 }
1518 }
1519
1520 /**
1521 * Return a list of file status objects that corresponds to supplied paths
1522 * excluding those non-existent paths.
1523 *
1524 * @param paths list of paths we want information from
1525 *
1526 * @return a list of FileStatus objects
1527 *
1528 * @throws AccessControlException If access is denied
1529 * @throws IOException If an I/O error occurred
1530 *
1531 * Exceptions applicable to file systems accessed over RPC:
1532 * @throws RpcClientException If an exception occurred in the RPC client
1533 * @throws RpcServerException If an exception occurred in the RPC server
1534 * @throws UnexpectedServerException If server implementation throws
1535 * undeclared exception to RPC server
1536 */
1537 private FileStatus[] getFileStatus(Path[] paths)
1538 throws AccessControlException, IOException {
1539 if (paths == null) {
1540 return null;
1541 }
1542 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
1543 for (int i = 0; i < paths.length; i++) {
1544 try {
1545 results.add(FileContext.this.getFileStatus(paths[i]));
1546 } catch (FileNotFoundException fnfe) {
1547 // ignoring
1548 }
1549 }
1550 return results.toArray(new FileStatus[results.size()]);
1551 }
1552
1553
1554 /**
1555 * Return the {@link ContentSummary} of path f.
1556 * @param f path
1557 *
1558 * @return the {@link ContentSummary} of path f.
1559 *
1560 * @throws AccessControlException If access is denied
1561 * @throws FileNotFoundException If <code>f</code> does not exist
1562 * @throws UnsupportedFileSystemException If file system for
1563 * <code>f</code> is not supported
1564 * @throws IOException If an I/O error occurred
1565 *
1566 * Exceptions applicable to file systems accessed over RPC:
1567 * @throws RpcClientException If an exception occurred in the RPC client
1568 * @throws RpcServerException If an exception occurred in the RPC server
1569 * @throws UnexpectedServerException If server implementation throws
1570 * undeclared exception to RPC server
1571 */
1572 public ContentSummary getContentSummary(Path f)
1573 throws AccessControlException, FileNotFoundException,
1574 UnsupportedFileSystemException, IOException {
1575 FileStatus status = FileContext.this.getFileStatus(f);
1576 if (status.isFile()) {
1577 return new ContentSummary(status.getLen(), 1, 0);
1578 }
1579 long[] summary = {0, 0, 1};
1580 RemoteIterator<FileStatus> statusIterator =
1581 FileContext.this.listStatus(f);
1582 while(statusIterator.hasNext()) {
1583 FileStatus s = statusIterator.next();
1584 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1585 new ContentSummary(s.getLen(), 1, 0);
1586 summary[0] += c.getLength();
1587 summary[1] += c.getFileCount();
1588 summary[2] += c.getDirectoryCount();
1589 }
1590 return new ContentSummary(summary[0], summary[1], summary[2]);
1591 }
1592
1593 /**
1594 * See {@link #listStatus(Path[], PathFilter)}
1595 */
1596 public FileStatus[] listStatus(Path[] files) throws AccessControlException,
1597 FileNotFoundException, IOException {
1598 return listStatus(files, DEFAULT_FILTER);
1599 }
1600
1601 /**
1602 * Filter files/directories in the given path using the user-supplied path
1603 * filter.
1604 *
1605 * @param f is the path name
1606 * @param filter is the user-supplied path filter
1607 *
1608 * @return an array of FileStatus objects for the files under the given path
1609 * after applying the filter
1610 *
1611 * @throws AccessControlException If access is denied
1612 * @throws FileNotFoundException If <code>f</code> does not exist
1613 * @throws UnsupportedFileSystemException If file system for
1614 * <code>pathPattern</code> is not supported
1615 * @throws IOException If an I/O error occurred
1616 *
1617 * Exceptions applicable to file systems accessed over RPC:
1618 * @throws RpcClientException If an exception occurred in the RPC client
1619 * @throws RpcServerException If an exception occurred in the RPC server
1620 * @throws UnexpectedServerException If server implementation throws
1621 * undeclared exception to RPC server
1622 */
1623 public FileStatus[] listStatus(Path f, PathFilter filter)
1624 throws AccessControlException, FileNotFoundException,
1625 UnsupportedFileSystemException, IOException {
1626 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1627 listStatus(results, f, filter);
1628 return results.toArray(new FileStatus[results.size()]);
1629 }
1630
1631 /**
1632 * Filter files/directories in the given list of paths using user-supplied
1633 * path filter.
1634 *
1635 * @param files is a list of paths
1636 * @param filter is the filter
1637 *
1638 * @return a list of statuses for the files under the given paths after
1639 * applying the filter
1640 *
1641 * @throws AccessControlException If access is denied
1642 * @throws FileNotFoundException If a file in <code>files</code> does not
1643 * exist
1644 * @throws IOException If an I/O error occurred
1645 *
1646 * Exceptions applicable to file systems accessed over RPC:
1647 * @throws RpcClientException If an exception occurred in the RPC client
1648 * @throws RpcServerException If an exception occurred in the RPC server
1649 * @throws UnexpectedServerException If server implementation throws
1650 * undeclared exception to RPC server
1651 */
1652 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1653 throws AccessControlException, FileNotFoundException, IOException {
1654 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1655 for (int i = 0; i < files.length; i++) {
1656 listStatus(results, files[i], filter);
1657 }
1658 return results.toArray(new FileStatus[results.size()]);
1659 }
1660
1661 /*
1662 * Filter files/directories in the given path using the user-supplied path
1663 * filter. Results are added to the given array <code>results</code>.
1664 */
1665 private void listStatus(ArrayList<FileStatus> results, Path f,
1666 PathFilter filter) throws AccessControlException,
1667 FileNotFoundException, IOException {
1668 FileStatus[] listing = listStatus(f);
1669 if (listing != null) {
1670 for (int i = 0; i < listing.length; i++) {
1671 if (filter.accept(listing[i].getPath())) {
1672 results.add(listing[i]);
1673 }
1674 }
1675 }
1676 }
1677
1678 /**
1679 * List the statuses of the files/directories in the given path
1680 * if the path is a directory.
1681 *
1682 * @param f is the path
1683 *
1684 * @return an array that contains statuses of the files/directories
1685 * in the given path
1686 *
1687 * @throws AccessControlException If access is denied
1688 * @throws FileNotFoundException If <code>f</code> does not exist
1689 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1690 * not supported
1691 * @throws IOException If an I/O error occurred
1692 *
1693 * Exceptions applicable to file systems accessed over RPC:
1694 * @throws RpcClientException If an exception occurred in the RPC client
1695 * @throws RpcServerException If an exception occurred in the RPC server
1696 * @throws UnexpectedServerException If server implementation throws
1697 * undeclared exception to RPC server
1698 */
1699 public FileStatus[] listStatus(final Path f) throws AccessControlException,
1700 FileNotFoundException, UnsupportedFileSystemException,
1701 IOException {
1702 final Path absF = fixRelativePart(f);
1703 return new FSLinkResolver<FileStatus[]>() {
1704 public FileStatus[] next(final AbstractFileSystem fs, final Path p)
1705 throws IOException, UnresolvedLinkException {
1706 return fs.listStatus(p);
1707 }
1708 }.resolve(FileContext.this, absF);
1709 }
1710
1711 /**
1712 * List the statuses and block locations of the files in the given path.
1713 *
1714 * If the path is a directory,
1715 * if recursive is false, returns files in the directory;
1716 * if recursive is true, return files in the subtree rooted at the path.
1717 * The subtree is traversed in the depth-first order.
1718 * If the path is a file, return the file's status and block locations.
1719 * Files across symbolic links are also returned.
1720 *
1721 * @param f is the path
1722 * @param recursive if the subdirectories need to be traversed recursively
1723 *
1724 * @return an iterator that traverses statuses of the files
1725 * If any IO exception (for example a sub-directory gets deleted while
1726 * listing is being executed), next() or hasNext() of the returned iterator
1727 * may throw a RuntimeException with the IO exception as the cause.
1728 *
1729 * @throws AccessControlException If access is denied
1730 * @throws FileNotFoundException If <code>f</code> does not exist
1731 * @throws UnsupportedFileSystemException If file system for <code>f</code>
1732 * is not supported
1733 * @throws IOException If an I/O error occurred
1734 *
1735 * Exceptions applicable to file systems accessed over RPC:
1736 * @throws RpcClientException If an exception occurred in the RPC client
1737 * @throws RpcServerException If an exception occurred in the RPC server
1738 * @throws UnexpectedServerException If server implementation throws
1739 * undeclared exception to RPC server
1740 */
1741 public RemoteIterator<LocatedFileStatus> listFiles(
1742 final Path f, final boolean recursive) throws AccessControlException,
1743 FileNotFoundException, UnsupportedFileSystemException,
1744 IOException {
1745 return new RemoteIterator<LocatedFileStatus>() {
1746 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1747 new Stack<RemoteIterator<LocatedFileStatus>>();
1748 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f);
1749 LocatedFileStatus curFile;
1750
1751 /**
1752 * Returns <tt>true</tt> if the iterator has more files.
1753 *
1754 * @return <tt>true</tt> if the iterator has more files.
1755 * @throws AccessControlException if not allowed to access next
1756 * file's status or locations
1757 * @throws FileNotFoundException if next file does not exist any more
1758 * @throws UnsupportedFileSystemException if next file's
1759 * fs is unsupported
1760 * @throws IOException for all other IO errors
1761 * for example, NameNode is not avaialbe or
1762 * NameNode throws IOException due to an error
1763 * while getting the status or block locations
1764 */
1765 @Override
1766 public boolean hasNext() throws IOException {
1767 while (curFile == null) {
1768 if (curItor.hasNext()) {
1769 handleFileStat(curItor.next());
1770 } else if (!itors.empty()) {
1771 curItor = itors.pop();
1772 } else {
1773 return false;
1774 }
1775 }
1776 return true;
1777 }
1778
1779 /**
1780 * Process the input stat.
1781 * If it is a file, return the file stat.
1782 * If it is a directory, traverse the directory if recursive is true;
1783 * ignore it if recursive is false.
1784 * If it is a symlink, resolve the symlink first and then process it
1785 * depending on if it is a file or directory.
1786 * @param stat input status
1787 * @throws AccessControlException if access is denied
1788 * @throws FileNotFoundException if file is not found
1789 * @throws UnsupportedFileSystemException if fs is not supported
1790 * @throws IOException for all other IO errors
1791 */
1792 private void handleFileStat(LocatedFileStatus stat)
1793 throws IOException {
1794 if (stat.isFile()) { // file
1795 curFile = stat;
1796 } else if (stat.isSymlink()) { // symbolic link
1797 // resolve symbolic link
1798 FileStatus symstat = FileContext.this.getFileStatus(
1799 stat.getSymlink());
1800 if (symstat.isFile() || (recursive && symstat.isDirectory())) {
1801 itors.push(curItor);
1802 curItor = listLocatedStatus(stat.getPath());
1803 }
1804 } else if (recursive) { // directory
1805 itors.push(curItor);
1806 curItor = listLocatedStatus(stat.getPath());
1807 }
1808 }
1809
1810 /**
1811 * Returns the next file's status with its block locations
1812 *
1813 * @throws AccessControlException if not allowed to access next
1814 * file's status or locations
1815 * @throws FileNotFoundException if next file does not exist any more
1816 * @throws UnsupportedFileSystemException if next file's
1817 * fs is unsupported
1818 * @throws IOException for all other IO errors
1819 * for example, NameNode is not avaialbe or
1820 * NameNode throws IOException due to an error
1821 * while getting the status or block locations
1822 */
1823 @Override
1824 public LocatedFileStatus next() throws IOException {
1825 if (hasNext()) {
1826 LocatedFileStatus result = curFile;
1827 curFile = null;
1828 return result;
1829 }
1830 throw new java.util.NoSuchElementException("No more entry in " + f);
1831 }
1832 };
1833 }
1834
1835 /**
1836 * <p>Return all the files that match filePattern and are not checksum
1837 * files. Results are sorted by their names.
1838 *
1839 * <p>
1840 * A filename pattern is composed of <i>regular</i> characters and
1841 * <i>special pattern matching</i> characters, which are:
1842 *
1843 * <dl>
1844 * <dd>
1845 * <dl>
1846 * <p>
1847 * <dt> <tt> ? </tt>
1848 * <dd> Matches any single character.
1849 *
1850 * <p>
1851 * <dt> <tt> * </tt>
1852 * <dd> Matches zero or more characters.
1853 *
1854 * <p>
1855 * <dt> <tt> [<i>abc</i>] </tt>
1856 * <dd> Matches a single character from character set
1857 * <tt>{<i>a,b,c</i>}</tt>.
1858 *
1859 * <p>
1860 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1861 * <dd> Matches a single character from the character range
1862 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be
1863 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1864 *
1865 * <p>
1866 * <dt> <tt> [^<i>a</i>] </tt>
1867 * <dd> Matches a single char that is not from character set or range
1868 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1869 * immediately to the right of the opening bracket.
1870 *
1871 * <p>
1872 * <dt> <tt> \<i>c</i> </tt>
1873 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1874 *
1875 * <p>
1876 * <dt> <tt> {ab,cd} </tt>
1877 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1878 *
1879 * <p>
1880 * <dt> <tt> {ab,c{de,fh}} </tt>
1881 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt>
1882 *
1883 * </dl>
1884 * </dd>
1885 * </dl>
1886 *
1887 * @param pathPattern a regular expression specifying a pth pattern
1888 *
1889 * @return an array of paths that match the path pattern
1890 *
1891 * @throws AccessControlException If access is denied
1892 * @throws UnsupportedFileSystemException If file system for
1893 * <code>pathPattern</code> is not supported
1894 * @throws IOException If an I/O error occurred
1895 *
1896 * Exceptions applicable to file systems accessed over RPC:
1897 * @throws RpcClientException If an exception occurred in the RPC client
1898 * @throws RpcServerException If an exception occurred in the RPC server
1899 * @throws UnexpectedServerException If server implementation throws
1900 * undeclared exception to RPC server
1901 */
1902 public FileStatus[] globStatus(Path pathPattern)
1903 throws AccessControlException, UnsupportedFileSystemException,
1904 IOException {
1905 return globStatus(pathPattern, DEFAULT_FILTER);
1906 }
1907
1908 /**
1909 * Return an array of FileStatus objects whose path names match pathPattern
1910 * and is accepted by the user-supplied path filter. Results are sorted by
1911 * their path names.
1912 * Return null if pathPattern has no glob and the path does not exist.
1913 * Return an empty array if pathPattern has a glob and no path matches it.
1914 *
1915 * @param pathPattern regular expression specifying the path pattern
1916 * @param filter user-supplied path filter
1917 *
1918 * @return an array of FileStatus objects
1919 *
1920 * @throws AccessControlException If access is denied
1921 * @throws UnsupportedFileSystemException If file system for
1922 * <code>pathPattern</code> is not supported
1923 * @throws IOException If an I/O error occurred
1924 *
1925 * Exceptions applicable to file systems accessed over RPC:
1926 * @throws RpcClientException If an exception occurred in the RPC client
1927 * @throws RpcServerException If an exception occurred in the RPC server
1928 * @throws UnexpectedServerException If server implementation throws
1929 * undeclared exception to RPC server
1930 */
1931 public FileStatus[] globStatus(final Path pathPattern,
1932 final PathFilter filter) throws AccessControlException,
1933 UnsupportedFileSystemException, IOException {
1934 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
1935
1936 String filename = pathPattern.toUri().getPath();
1937
1938 List<String> filePatterns = GlobExpander.expand(filename);
1939 if (filePatterns.size() == 1) {
1940 Path absPathPattern = fixRelativePart(pathPattern);
1941 return globStatusInternal(uri, new Path(absPathPattern.toUri()
1942 .getPath()), filter);
1943 } else {
1944 List<FileStatus> results = new ArrayList<FileStatus>();
1945 for (String iFilePattern : filePatterns) {
1946 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
1947 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
1948 for (FileStatus file : files) {
1949 results.add(file);
1950 }
1951 }
1952 return results.toArray(new FileStatus[results.size()]);
1953 }
1954 }
1955
1956 /**
1957 *
1958 * @param uri for all the inPathPattern
1959 * @param inPathPattern - without the scheme & authority (take from uri)
1960 * @param filter
1961 *
1962 * @return an array of FileStatus objects
1963 *
1964 * @throws AccessControlException If access is denied
1965 * @throws IOException If an I/O error occurred
1966 */
1967 private FileStatus[] globStatusInternal(final URI uri,
1968 final Path inPathPattern, final PathFilter filter)
1969 throws AccessControlException, IOException
1970 {
1971 Path[] parents = new Path[1];
1972 int level = 0;
1973
1974 assert(inPathPattern.toUri().getScheme() == null &&
1975 inPathPattern.toUri().getAuthority() == null &&
1976 inPathPattern.isUriPathAbsolute());
1977
1978
1979 String filename = inPathPattern.toUri().getPath();
1980
1981 // path has only zero component
1982 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
1983 Path p = inPathPattern.makeQualified(uri, null);
1984 return getFileStatus(new Path[]{p});
1985 }
1986
1987 // path has at least one component
1988 String[] components = filename.split(Path.SEPARATOR);
1989
1990 // Path is absolute, first component is "/" hence first component
1991 // is the uri root
1992 parents[0] = new Path(new Path(uri), new Path("/"));
1993 level = 1;
1994
1995 // glob the paths that match the parent path, ie. [0, components.length-1]
1996 boolean[] hasGlob = new boolean[]{false};
1997 Path[] relParentPaths =
1998 globPathsLevel(parents, components, level, hasGlob);
1999 FileStatus[] results;
2000
2001 if (relParentPaths == null || relParentPaths.length == 0) {
2002 results = null;
2003 } else {
2004 // fix the pathes to be abs
2005 Path[] parentPaths = new Path [relParentPaths.length];
2006 for(int i=0; i<relParentPaths.length; i++) {
2007 parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
2008 }
2009
2010 // Now work on the last component of the path
2011 GlobFilter fp =
2012 new GlobFilter(components[components.length - 1], filter);
2013 if (fp.hasPattern()) { // last component has a pattern
2014 // list parent directories and then glob the results
2015 results = listStatus(parentPaths, fp);
2016 hasGlob[0] = true;
2017 } else { // last component does not have a pattern
2018 // get all the path names
2019 ArrayList<Path> filteredPaths =
2020 new ArrayList<Path>(parentPaths.length);
2021 for (int i = 0; i < parentPaths.length; i++) {
2022 parentPaths[i] = new Path(parentPaths[i],
2023 components[components.length - 1]);
2024 if (fp.accept(parentPaths[i])) {
2025 filteredPaths.add(parentPaths[i]);
2026 }
2027 }
2028 // get all their statuses
2029 results = getFileStatus(
2030 filteredPaths.toArray(new Path[filteredPaths.size()]));
2031 }
2032 }
2033
2034 // Decide if the pathPattern contains a glob or not
2035 if (results == null) {
2036 if (hasGlob[0]) {
2037 results = new FileStatus[0];
2038 }
2039 } else {
2040 if (results.length == 0) {
2041 if (!hasGlob[0]) {
2042 results = null;
2043 }
2044 } else {
2045 Arrays.sort(results);
2046 }
2047 }
2048 return results;
2049 }
2050
2051 /*
2052 * For a path of N components, return a list of paths that match the
2053 * components [<code>level</code>, <code>N-1</code>].
2054 */
2055 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
2056 int level, boolean[] hasGlob) throws AccessControlException,
2057 FileNotFoundException, IOException {
2058 if (level == filePattern.length - 1) {
2059 return parents;
2060 }
2061 if (parents == null || parents.length == 0) {
2062 return null;
2063 }
2064 GlobFilter fp = new GlobFilter(filePattern[level]);
2065 if (fp.hasPattern()) {
2066 parents = FileUtil.stat2Paths(listStatus(parents, fp));
2067 hasGlob[0] = true;
2068 } else {
2069 for (int i = 0; i < parents.length; i++) {
2070 parents[i] = new Path(parents[i], filePattern[level]);
2071 }
2072 }
2073 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
2074 }
2075
2076 /**
2077 * Copy file from src to dest. See
2078 * {@link #copy(Path, Path, boolean, boolean)}
2079 */
2080 public boolean copy(final Path src, final Path dst)
2081 throws AccessControlException, FileAlreadyExistsException,
2082 FileNotFoundException, ParentNotDirectoryException,
2083 UnsupportedFileSystemException, IOException {
2084 return copy(src, dst, false, false);
2085 }
2086
2087 /**
2088 * Copy from src to dst, optionally deleting src and overwriting dst.
2089 * @param src
2090 * @param dst
2091 * @param deleteSource - delete src if true
2092 * @param overwrite overwrite dst if true; throw IOException if dst exists
2093 * and overwrite is false.
2094 *
2095 * @return true if copy is successful
2096 *
2097 * @throws AccessControlException If access is denied
2098 * @throws FileAlreadyExistsException If <code>dst</code> already exists
2099 * @throws FileNotFoundException If <code>src</code> does not exist
2100 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not
2101 * a directory
2102 * @throws UnsupportedFileSystemException If file system for
2103 * <code>src</code> or <code>dst</code> is not supported
2104 * @throws IOException If an I/O error occurred
2105 *
2106 * Exceptions applicable to file systems accessed over RPC:
2107 * @throws RpcClientException If an exception occurred in the RPC client
2108 * @throws RpcServerException If an exception occurred in the RPC server
2109 * @throws UnexpectedServerException If server implementation throws
2110 * undeclared exception to RPC server
2111 *
2112 * RuntimeExceptions:
2113 * @throws InvalidPathException If path <code>dst</code> is invalid
2114 */
2115 public boolean copy(final Path src, final Path dst, boolean deleteSource,
2116 boolean overwrite) throws AccessControlException,
2117 FileAlreadyExistsException, FileNotFoundException,
2118 ParentNotDirectoryException, UnsupportedFileSystemException,
2119 IOException {
2120 checkNotSchemeWithRelative(src);
2121 checkNotSchemeWithRelative(dst);
2122 Path qSrc = makeQualified(src);
2123 Path qDst = makeQualified(dst);
2124 checkDest(qSrc.getName(), qDst, overwrite);
2125 FileStatus fs = FileContext.this.getFileStatus(qSrc);
2126 if (fs.isDirectory()) {
2127 checkDependencies(qSrc, qDst);
2128 mkdir(qDst, FsPermission.getDefault(), true);
2129 FileStatus[] contents = listStatus(qSrc);
2130 for (FileStatus content : contents) {
2131 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
2132 content.getPath().getName())), deleteSource, overwrite);
2133 }
2134 } else {
2135 InputStream in=null;
2136 OutputStream out = null;
2137 try {
2138 in = open(qSrc);
2139 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of(
2140 CreateFlag.CREATE, CreateFlag.OVERWRITE) :
2141 EnumSet.of(CreateFlag.CREATE);
2142 out = create(qDst, createFlag);
2143 IOUtils.copyBytes(in, out, conf, true);
2144 } catch (IOException e) {
2145 IOUtils.closeStream(out);
2146 IOUtils.closeStream(in);
2147 throw e;
2148 }
2149 }
2150 if (deleteSource) {
2151 return delete(qSrc, true);
2152 } else {
2153 return true;
2154 }
2155 }
2156 }
2157
2158 /**
2159 * Check if copying srcName to dst would overwrite an existing
2160 * file or directory.
2161 * @param srcName File or directory to be copied.
2162 * @param dst Destination to copy srcName to.
2163 * @param overwrite Whether it's ok to overwrite an existing file.
2164 * @throws AccessControlException If access is denied.
2165 * @throws IOException If dst is an existing directory, or dst is an
2166 * existing file and the overwrite option is not passed.
2167 */
2168 private void checkDest(String srcName, Path dst, boolean overwrite)
2169 throws AccessControlException, IOException {
2170 try {
2171 FileStatus dstFs = getFileStatus(dst);
2172 if (dstFs.isDirectory()) {
2173 if (null == srcName) {
2174 throw new IOException("Target " + dst + " is a directory");
2175 }
2176 // Recurse to check if dst/srcName exists.
2177 checkDest(null, new Path(dst, srcName), overwrite);
2178 } else if (!overwrite) {
2179 throw new IOException("Target " + new Path(dst, srcName)
2180 + " already exists");
2181 }
2182 } catch (FileNotFoundException e) {
2183 // dst does not exist - OK to copy.
2184 }
2185 }
2186
2187 //
2188 // If the destination is a subdirectory of the source, then
2189 // generate exception
2190 //
2191 private static void checkDependencies(Path qualSrc, Path qualDst)
2192 throws IOException {
2193 if (isSameFS(qualSrc, qualDst)) {
2194 String srcq = qualSrc.toString() + Path.SEPARATOR;
2195 String dstq = qualDst.toString() + Path.SEPARATOR;
2196 if (dstq.startsWith(srcq)) {
2197 if (srcq.length() == dstq.length()) {
2198 throw new IOException("Cannot copy " + qualSrc + " to itself.");
2199 } else {
2200 throw new IOException("Cannot copy " + qualSrc +
2201 " to its subdirectory " + qualDst);
2202 }
2203 }
2204 }
2205 }
2206
2207 /**
2208 * Are qualSrc and qualDst of the same file system?
2209 * @param qualPath1 - fully qualified path
2210 * @param qualPath2 - fully qualified path
2211 * @return
2212 */
2213 private static boolean isSameFS(Path qualPath1, Path qualPath2) {
2214 URI srcUri = qualPath1.toUri();
2215 URI dstUri = qualPath2.toUri();
2216 return (srcUri.getScheme().equals(dstUri.getScheme()) &&
2217 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
2218 .getAuthority().equals(dstUri.getAuthority())));
2219 }
2220
2221 /**
2222 * Deletes all the paths in deleteOnExit on JVM shutdown.
2223 */
2224 static class FileContextFinalizer implements Runnable {
2225 public synchronized void run() {
2226 processDeleteOnExit();
2227 }
2228 }
2229
2230 /**
2231 * Resolves all symbolic links in the specified path.
2232 * Returns the new path object.
2233 */
2234 protected Path resolve(final Path f) throws FileNotFoundException,
2235 UnresolvedLinkException, AccessControlException, IOException {
2236 return new FSLinkResolver<Path>() {
2237 public Path next(final AbstractFileSystem fs, final Path p)
2238 throws IOException, UnresolvedLinkException {
2239 return fs.resolvePath(p);
2240 }
2241 }.resolve(this, f);
2242 }
2243
2244 /**
2245 * Resolves all symbolic links in the specified path leading up
2246 * to, but not including the final path component.
2247 * @param f path to resolve
2248 * @return the new path object.
2249 */
2250 protected Path resolveIntermediate(final Path f) throws IOException {
2251 return new FSLinkResolver<FileStatus>() {
2252 public FileStatus next(final AbstractFileSystem fs, final Path p)
2253 throws IOException, UnresolvedLinkException {
2254 return fs.getFileLinkStatus(p);
2255 }
2256 }.resolve(this, f).getPath();
2257 }
2258
2259 /**
2260 * Returns the list of AbstractFileSystems accessed in the path. The list may
2261 * contain more than one AbstractFileSystems objects in case of symlinks.
2262 *
2263 * @param f
2264 * Path which needs to be resolved
2265 * @return List of AbstractFileSystems accessed in the path
2266 * @throws IOException
2267 */
2268 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f)
2269 throws IOException {
2270 final Path absF = fixRelativePart(f);
2271 final HashSet<AbstractFileSystem> result
2272 = new HashSet<AbstractFileSystem>();
2273 new FSLinkResolver<Void>() {
2274 public Void next(final AbstractFileSystem fs, final Path p)
2275 throws IOException, UnresolvedLinkException {
2276 result.add(fs);
2277 fs.getFileStatus(p);
2278 return null;
2279 }
2280 }.resolve(this, absF);
2281 return result;
2282 }
2283
2284 /**
2285 * Class used to perform an operation on and resolve symlinks in a
2286 * path. The operation may potentially span multiple file systems.
2287 */
2288 protected abstract class FSLinkResolver<T> {
2289 // The maximum number of symbolic link components in a path
2290 private static final int MAX_PATH_LINKS = 32;
2291
2292 /**
2293 * Generic helper function overridden on instantiation to perform a
2294 * specific operation on the given file system using the given path
2295 * which may result in an UnresolvedLinkException.
2296 * @param fs AbstractFileSystem to perform the operation on.
2297 * @param p Path given the file system.
2298 * @return Generic type determined by the specific implementation.
2299 * @throws UnresolvedLinkException If symbolic link <code>path</code> could
2300 * not be resolved
2301 * @throws IOException an I/O error occured
2302 */
2303 public abstract T next(final AbstractFileSystem fs, final Path p)
2304 throws IOException, UnresolvedLinkException;
2305
2306 /**
2307 * Performs the operation specified by the next function, calling it
2308 * repeatedly until all symlinks in the given path are resolved.
2309 * @param fc FileContext used to access file systems.
2310 * @param p The path to resolve symlinks in.
2311 * @return Generic type determined by the implementation of next.
2312 * @throws IOException
2313 */
2314 public T resolve(final FileContext fc, Path p) throws IOException {
2315 int count = 0;
2316 T in = null;
2317 Path first = p;
2318 // NB: More than one AbstractFileSystem can match a scheme, eg
2319 // "file" resolves to LocalFs but could have come by RawLocalFs.
2320 AbstractFileSystem fs = fc.getFSofPath(p);
2321
2322 // Loop until all symlinks are resolved or the limit is reached
2323 for (boolean isLink = true; isLink;) {
2324 try {
2325 in = next(fs, p);
2326 isLink = false;
2327 } catch (UnresolvedLinkException e) {
2328 if (count++ > MAX_PATH_LINKS) {
2329 throw new IOException("Possible cyclic loop while " +
2330 "following symbolic link " + first);
2331 }
2332 // Resolve the first unresolved path component
2333 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
2334 fs = fc.getFSofPath(p);
2335 }
2336 }
2337 return in;
2338 }
2339 }
2340
2341 /**
2342 * Get the statistics for a particular file system
2343 *
2344 * @param uri
2345 * the uri to lookup the statistics. Only scheme and authority part
2346 * of the uri are used as the key to store and lookup.
2347 * @return a statistics object
2348 */
2349 public static Statistics getStatistics(URI uri) {
2350 return AbstractFileSystem.getStatistics(uri);
2351 }
2352
2353 /**
2354 * Clears all the statistics stored in AbstractFileSystem, for all the file
2355 * systems.
2356 */
2357 public static void clearStatistics() {
2358 AbstractFileSystem.clearStatistics();
2359 }
2360
2361 /**
2362 * Prints the statistics to standard output. File System is identified by the
2363 * scheme and authority.
2364 */
2365 public static void printStatistics() {
2366 AbstractFileSystem.printStatistics();
2367 }
2368
2369 /**
2370 * @return Map of uri and statistics for each filesystem instantiated. The uri
2371 * consists of scheme and authority for the filesystem.
2372 */
2373 public static Map<URI, Statistics> getAllStatistics() {
2374 return AbstractFileSystem.getAllStatistics();
2375 }
2376
2377 /**
2378 * Get delegation tokens for the file systems accessed for a given
2379 * path.
2380 * @param p Path for which delegations tokens are requested.
2381 * @param renewer the account name that is allowed to renew the token.
2382 * @return List of delegation tokens.
2383 * @throws IOException
2384 */
2385 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
2386 public List<Token<?>> getDelegationTokens(
2387 Path p, String renewer) throws IOException {
2388 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p);
2389 List<Token<?>> tokenList =
2390 new ArrayList<Token<?>>();
2391 for (AbstractFileSystem afs : afsSet) {
2392 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer);
2393 tokenList.addAll(afsTokens);
2394 }
2395 return tokenList;
2396 }
2397 }