001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumSet;
029 import java.util.HashSet;
030 import java.util.IdentityHashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.Stack;
035 import java.util.TreeSet;
036 import java.util.Map.Entry;
037
038 import org.apache.commons.logging.Log;
039 import org.apache.commons.logging.LogFactory;
040 import org.apache.hadoop.HadoopIllegalArgumentException;
041 import org.apache.hadoop.classification.InterfaceAudience;
042 import org.apache.hadoop.classification.InterfaceStability;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileSystem.Statistics;
045 import org.apache.hadoop.fs.Options.CreateOpts;
046 import org.apache.hadoop.fs.permission.FsPermission;
047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
049 import org.apache.hadoop.io.IOUtils;
050 import org.apache.hadoop.ipc.RpcClientException;
051 import org.apache.hadoop.ipc.RpcServerException;
052 import org.apache.hadoop.ipc.UnexpectedServerException;
053 import org.apache.hadoop.fs.InvalidPathException;
054 import org.apache.hadoop.security.AccessControlException;
055 import org.apache.hadoop.security.UserGroupInformation;
056 import org.apache.hadoop.security.token.Token;
057
058 /**
059 * The FileContext class provides an interface to the application writer for
060 * using the Hadoop file system.
061 * It provides a set of methods for the usual operation: create, open,
062 * list, etc
063 *
064 * <p>
065 * <b> *** Path Names *** </b>
066 * <p>
067 *
068 * The Hadoop file system supports a URI name space and URI names.
069 * It offers a forest of file systems that can be referenced using fully
070 * qualified URIs.
071 * Two common Hadoop file systems implementations are
072 * <ul>
073 * <li> the local file system: file:///path
074 * <li> the hdfs file system hdfs://nnAddress:nnPort/path
075 * </ul>
076 *
077 * While URI names are very flexible, it requires knowing the name or address
078 * of the server. For convenience one often wants to access the default system
079 * in one's environment without knowing its name/address. This has an
080 * additional benefit that it allows one to change one's default fs
081 * (e.g. admin moves application from cluster1 to cluster2).
082 * <p>
083 *
084 * To facilitate this, Hadoop supports a notion of a default file system.
085 * The user can set his default file system, although this is
086 * typically set up for you in your environment via your default config.
087 * A default file system implies a default scheme and authority; slash-relative
088 * names (such as /for/bar) are resolved relative to that default FS.
089 * Similarly a user can also have working-directory-relative names (i.e. names
090 * not starting with a slash). While the working directory is generally in the
091 * same default FS, the wd can be in a different FS.
092 * <p>
093 * Hence Hadoop path names can be one of:
094 * <ul>
095 * <li> fully qualified URI: scheme://authority/path
096 * <li> slash relative names: /path relative to the default file system
097 * <li> wd-relative names: path relative to the working dir
098 * </ul>
099 * Relative paths with scheme (scheme:foo/bar) are illegal.
100 *
101 * <p>
102 * <b>****The Role of the FileContext and configuration defaults****</b>
103 * <p>
104 * The FileContext provides file namespace context for resolving file names;
105 * it also contains the umask for permissions, In that sense it is like the
106 * per-process file-related state in Unix system.
107 * These two properties
108 * <ul>
109 * <li> default file system i.e your slash)
110 * <li> umask
111 * </ul>
112 * in general, are obtained from the default configuration file
113 * in your environment, (@see {@link Configuration}).
114 *
115 * No other configuration parameters are obtained from the default config as
116 * far as the file context layer is concerned. All file system instances
117 * (i.e. deployments of file systems) have default properties; we call these
118 * server side (SS) defaults. Operation like create allow one to select many
119 * properties: either pass them in as explicit parameters or use
120 * the SS properties.
121 * <p>
122 * The file system related SS defaults are
123 * <ul>
124 * <li> the home directory (default is "/user/userName")
125 * <li> the initial wd (only for local fs)
126 * <li> replication factor
127 * <li> block size
128 * <li> buffer size
129 * <li> bytesPerChecksum (if used).
130 * </ul>
131 *
132 * <p>
133 * <b> *** Usage Model for the FileContext class *** </b>
134 * <p>
135 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
136 * Unspecified values come from core-defaults.xml in the release jar.
137 * <ul>
138 * <li> myFContext = FileContext.getFileContext(); // uses the default config
139 * // which has your default FS
140 * <li> myFContext.create(path, ...);
141 * <li> myFContext.setWorkingDir(path)
142 * <li> myFContext.open (path, ...);
143 * </ul>
144 * Example 2: Get a FileContext with a specific URI as the default FS
145 * <ul>
146 * <li> myFContext = FileContext.getFileContext(URI)
147 * <li> myFContext.create(path, ...);
148 * ...
149 * </ul>
150 * Example 3: FileContext with local file system as the default
151 * <ul>
152 * <li> myFContext = FileContext.getLocalFSFileContext()
153 * <li> myFContext.create(path, ...);
154 * <li> ...
155 * </ul>
156 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG
157 * Generally you should not need use a config unless you are doing
158 * <ul>
159 * <li> configX = someConfigSomeOnePassedToYou.
160 * <li> myFContext = getFileContext(configX); // configX is not changed,
161 * // is passed down
162 * <li> myFContext.create(path, ...);
163 * <li>...
164 * </ul>
165 *
166 */
167
168 @InterfaceAudience.Public
169 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
170 public final class FileContext {
171
172 public static final Log LOG = LogFactory.getLog(FileContext.class);
173 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault();
174
175 /**
176 * List of files that should be deleted on JVM shutdown.
177 */
178 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT =
179 new IdentityHashMap<FileContext, Set<Path>>();
180
181 /** JVM shutdown hook thread. */
182 static final FileContextFinalizer FINALIZER =
183 new FileContextFinalizer();
184
185 private static final PathFilter DEFAULT_FILTER = new PathFilter() {
186 public boolean accept(final Path file) {
187 return true;
188 }
189 };
190
191 /**
192 * The FileContext is defined by.
193 * 1) defaultFS (slash)
194 * 2) wd
195 * 3) umask
196 */
197 private final AbstractFileSystem defaultFS; //default FS for this FileContext.
198 private Path workingDir; // Fully qualified
199 private FsPermission umask;
200 private final Configuration conf;
201 private final UserGroupInformation ugi;
202
203 private FileContext(final AbstractFileSystem defFs,
204 final FsPermission theUmask, final Configuration aConf) {
205 defaultFS = defFs;
206 umask = FsPermission.getUMask(aConf);
207 conf = aConf;
208 try {
209 ugi = UserGroupInformation.getCurrentUser();
210 } catch (IOException e) {
211 LOG.error("Exception in getCurrentUser: ",e);
212 throw new RuntimeException("Failed to get the current user " +
213 "while creating a FileContext", e);
214 }
215 /*
216 * Init the wd.
217 * WorkingDir is implemented at the FileContext layer
218 * NOT at the AbstractFileSystem layer.
219 * If the DefaultFS, such as localFilesystem has a notion of
220 * builtin WD, we use that as the initial WD.
221 * Otherwise the WD is initialized to the home directory.
222 */
223 workingDir = defaultFS.getInitialWorkingDirectory();
224 if (workingDir == null) {
225 workingDir = defaultFS.getHomeDirectory();
226 }
227 util = new Util(); // for the inner class
228 }
229
230 /*
231 * Remove relative part - return "absolute":
232 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar"
233 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path
234 * ("/foo/bar") are returned unchanged.
235 *
236 * Applications that use FileContext should use #makeQualified() since
237 * they really want a fully qualified URI.
238 * Hence this method is not called makeAbsolute() and
239 * has been deliberately declared private.
240 */
241 private Path fixRelativePart(Path p) {
242 if (p.isUriPathAbsolute()) {
243 return p;
244 } else {
245 return new Path(workingDir, p);
246 }
247 }
248
249 /**
250 * Delete all the paths that were marked as delete-on-exit.
251 */
252 static void processDeleteOnExit() {
253 synchronized (DELETE_ON_EXIT) {
254 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet();
255 for (Entry<FileContext, Set<Path>> entry : set) {
256 FileContext fc = entry.getKey();
257 Set<Path> paths = entry.getValue();
258 for (Path path : paths) {
259 try {
260 fc.delete(path, true);
261 } catch (IOException e) {
262 LOG.warn("Ignoring failure to deleteOnExit for path " + path);
263 }
264 }
265 }
266 DELETE_ON_EXIT.clear();
267 }
268 }
269
270 /**
271 * Pathnames with scheme and relative path are illegal.
272 * @param path to be checked
273 */
274 private static void checkNotSchemeWithRelative(final Path path) {
275 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) {
276 throw new HadoopIllegalArgumentException(
277 "Unsupported name: has scheme but relative path-part");
278 }
279 }
280
281 /**
282 * Get the file system of supplied path.
283 *
284 * @param absOrFqPath - absolute or fully qualified path
285 * @return the file system of the path
286 *
287 * @throws UnsupportedFileSystemException If the file system for
288 * <code>absOrFqPath</code> is not supported.
289 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could
290 * not be instantiated.
291 */
292 private AbstractFileSystem getFSofPath(final Path absOrFqPath)
293 throws UnsupportedFileSystemException, IOException {
294 checkNotSchemeWithRelative(absOrFqPath);
295 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) {
296 throw new HadoopIllegalArgumentException(
297 "FileContext Bug: path is relative");
298 }
299
300 try {
301 // Is it the default FS for this FileContext?
302 defaultFS.checkPath(absOrFqPath);
303 return defaultFS;
304 } catch (Exception e) { // it is different FileSystem
305 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf);
306 }
307 }
308
309 private static AbstractFileSystem getAbstractFileSystem(
310 UserGroupInformation user, final URI uri, final Configuration conf)
311 throws UnsupportedFileSystemException, IOException {
312 try {
313 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() {
314 public AbstractFileSystem run() throws UnsupportedFileSystemException {
315 return AbstractFileSystem.get(uri, conf);
316 }
317 });
318 } catch (InterruptedException ex) {
319 LOG.error(ex);
320 throw new IOException("Failed to get the AbstractFileSystem for path: "
321 + uri, ex);
322 }
323 }
324
325 /**
326 * Protected Static Factory methods for getting a FileContexts
327 * that take a AbstractFileSystem as input. To be used for testing.
328 */
329
330 /**
331 * Create a FileContext with specified FS as default using the specified
332 * config.
333 *
334 * @param defFS
335 * @param aConf
336 * @return new FileContext with specifed FS as default.
337 */
338 public static FileContext getFileContext(final AbstractFileSystem defFS,
339 final Configuration aConf) {
340 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf);
341 }
342
343 /**
344 * Create a FileContext for specified file system using the default config.
345 *
346 * @param defaultFS
347 * @return a FileContext with the specified AbstractFileSystem
348 * as the default FS.
349 */
350 protected static FileContext getFileContext(
351 final AbstractFileSystem defaultFS) {
352 return getFileContext(defaultFS, new Configuration());
353 }
354
355 /**
356 * Static Factory methods for getting a FileContext.
357 * Note new file contexts are created for each call.
358 * The only singleton is the local FS context using the default config.
359 *
360 * Methods that use the default config: the default config read from the
361 * $HADOOP_CONFIG/core.xml,
362 * Unspecified key-values for config are defaulted from core-defaults.xml
363 * in the release jar.
364 *
365 * The keys relevant to the FileContext layer are extracted at time of
366 * construction. Changes to the config after the call are ignore
367 * by the FileContext layer.
368 * The conf is passed to lower layers like AbstractFileSystem and HDFS which
369 * pick up their own config variables.
370 */
371
372 /**
373 * Create a FileContext using the default config read from the
374 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted
375 * from core-defaults.xml in the release jar.
376 *
377 * @throws UnsupportedFileSystemException If the file system from the default
378 * configuration is not supported
379 */
380 public static FileContext getFileContext()
381 throws UnsupportedFileSystemException {
382 return getFileContext(new Configuration());
383 }
384
385 /**
386 * @return a FileContext for the local file system using the default config.
387 * @throws UnsupportedFileSystemException If the file system for
388 * {@link FsConstants#LOCAL_FS_URI} is not supported.
389 */
390 public static FileContext getLocalFSFileContext()
391 throws UnsupportedFileSystemException {
392 return getFileContext(FsConstants.LOCAL_FS_URI);
393 }
394
395 /**
396 * Create a FileContext for specified URI using the default config.
397 *
398 * @param defaultFsUri
399 * @return a FileContext with the specified URI as the default FS.
400 *
401 * @throws UnsupportedFileSystemException If the file system for
402 * <code>defaultFsUri</code> is not supported
403 */
404 public static FileContext getFileContext(final URI defaultFsUri)
405 throws UnsupportedFileSystemException {
406 return getFileContext(defaultFsUri, new Configuration());
407 }
408
409 /**
410 * Create a FileContext for specified default URI using the specified config.
411 *
412 * @param defaultFsUri
413 * @param aConf
414 * @return new FileContext for specified uri
415 * @throws UnsupportedFileSystemException If the file system with specified is
416 * not supported
417 * @throws RuntimeException If the file system specified is supported but
418 * could not be instantiated, or if login fails.
419 */
420 public static FileContext getFileContext(final URI defaultFsUri,
421 final Configuration aConf) throws UnsupportedFileSystemException {
422 UserGroupInformation currentUser = null;
423 AbstractFileSystem defaultAfs = null;
424 try {
425 currentUser = UserGroupInformation.getCurrentUser();
426 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf);
427 } catch (UnsupportedFileSystemException ex) {
428 throw ex;
429 } catch (IOException ex) {
430 LOG.error(ex);
431 throw new RuntimeException(ex);
432 }
433 return getFileContext(defaultAfs, aConf);
434 }
435
436 /**
437 * Create a FileContext using the passed config. Generally it is better to use
438 * {@link #getFileContext(URI, Configuration)} instead of this one.
439 *
440 *
441 * @param aConf
442 * @return new FileContext
443 * @throws UnsupportedFileSystemException If file system in the config
444 * is not supported
445 */
446 public static FileContext getFileContext(final Configuration aConf)
447 throws UnsupportedFileSystemException {
448 return getFileContext(
449 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)),
450 aConf);
451 }
452
453 /**
454 * @param aConf - from which the FileContext is configured
455 * @return a FileContext for the local file system using the specified config.
456 *
457 * @throws UnsupportedFileSystemException If default file system in the config
458 * is not supported
459 *
460 */
461 public static FileContext getLocalFSFileContext(final Configuration aConf)
462 throws UnsupportedFileSystemException {
463 return getFileContext(FsConstants.LOCAL_FS_URI, aConf);
464 }
465
466 /* This method is needed for tests. */
467 @InterfaceAudience.Private
468 @InterfaceStability.Unstable /* return type will change to AFS once
469 HADOOP-6223 is completed */
470 public AbstractFileSystem getDefaultFileSystem() {
471 return defaultFS;
472 }
473
474 /**
475 * Set the working directory for wd-relative names (such a "foo/bar"). Working
476 * directory feature is provided by simply prefixing relative names with the
477 * working dir. Note this is different from Unix where the wd is actually set
478 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works
479 * better in a distributed environment that has multiple independent roots.
480 * {@link #getWorkingDirectory()} should return what setWorkingDir() set.
481 *
482 * @param newWDir new working directory
483 * @throws IOException
484 * <br>
485 * NewWdir can be one of:
486 * <ul>
487 * <li>relative path: "foo/bar";</li>
488 * <li>absolute without scheme: "/foo/bar"</li>
489 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li>
490 * </ul>
491 * <br>
492 * Illegal WDs:
493 * <ul>
494 * <li>relative with scheme: "xx:foo/bar"</li>
495 * <li>non existent directory</li>
496 * </ul>
497 */
498 public void setWorkingDirectory(final Path newWDir) throws IOException {
499 checkNotSchemeWithRelative(newWDir);
500 /* wd is stored as a fully qualified path. We check if the given
501 * path is not relative first since resolve requires and returns
502 * an absolute path.
503 */
504 final Path newWorkingDir = new Path(workingDir, newWDir);
505 FileStatus status = getFileStatus(newWorkingDir);
506 if (status.isFile()) {
507 throw new FileNotFoundException("Cannot setWD to a file");
508 }
509 workingDir = newWorkingDir;
510 }
511
512 /**
513 * Gets the working directory for wd-relative names (such a "foo/bar").
514 */
515 public Path getWorkingDirectory() {
516 return workingDir;
517 }
518
519 /**
520 * Gets the ugi in the file-context
521 * @return UserGroupInformation
522 */
523 public UserGroupInformation getUgi() {
524 return ugi;
525 }
526
527 /**
528 * Return the current user's home directory in this file system.
529 * The default implementation returns "/user/$USER/".
530 * @return the home directory
531 */
532 public Path getHomeDirectory() {
533 return defaultFS.getHomeDirectory();
534 }
535
536 /**
537 *
538 * @return the umask of this FileContext
539 */
540 public FsPermission getUMask() {
541 return umask;
542 }
543
544 /**
545 * Set umask to the supplied parameter.
546 * @param newUmask the new umask
547 */
548 public void setUMask(final FsPermission newUmask) {
549 umask = newUmask;
550 }
551
552
553 /**
554 * Resolve the path following any symlinks or mount points
555 * @param f to be resolved
556 * @return fully qualified resolved path
557 *
558 * @throws FileNotFoundException If <code>f</code> does not exist
559 * @throws AccessControlException if access denied
560 * @throws IOException If an IO Error occurred
561 *
562 * Exceptions applicable to file systems accessed over RPC:
563 * @throws RpcClientException If an exception occurred in the RPC client
564 * @throws RpcServerException If an exception occurred in the RPC server
565 * @throws UnexpectedServerException If server implementation throws
566 * undeclared exception to RPC server
567 *
568 * RuntimeExceptions:
569 * @throws InvalidPathException If path <code>f</code> is not valid
570 */
571 public Path resolvePath(final Path f) throws FileNotFoundException,
572 UnresolvedLinkException, AccessControlException, IOException {
573 return resolve(f);
574 }
575
576 /**
577 * Make the path fully qualified if it is isn't.
578 * A Fully-qualified path has scheme and authority specified and an absolute
579 * path.
580 * Use the default file system and working dir in this FileContext to qualify.
581 * @param path
582 * @return qualified path
583 */
584 public Path makeQualified(final Path path) {
585 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory());
586 }
587
588 /**
589 * Create or overwrite file on indicated path and returns an output stream for
590 * writing into the file.
591 *
592 * @param f the file name to open
593 * @param createFlag gives the semantics of create; see {@link CreateFlag}
594 * @param opts file creation options; see {@link Options.CreateOpts}.
595 * <ul>
596 * <li>Progress - to report progress on the operation - default null
597 * <li>Permission - umask is applied against permisssion: default is
598 * FsPermissions:getDefault()
599 *
600 * <li>CreateParent - create missing parent path; default is to not
601 * to create parents
602 * <li>The defaults for the following are SS defaults of the file
603 * server implementing the target path. Not all parameters make sense
604 * for all kinds of file system - eg. localFS ignores Blocksize,
605 * replication, checksum
606 * <ul>
607 * <li>BufferSize - buffersize used in FSDataOutputStream
608 * <li>Blocksize - block size for file blocks
609 * <li>ReplicationFactor - replication for blocks
610 * <li>BytesPerChecksum - bytes per checksum
611 * </ul>
612 * </ul>
613 *
614 * @return {@link FSDataOutputStream} for created file
615 *
616 * @throws AccessControlException If access is denied
617 * @throws FileAlreadyExistsException If file <code>f</code> already exists
618 * @throws FileNotFoundException If parent of <code>f</code> does not exist
619 * and <code>createParent</code> is false
620 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a
621 * directory.
622 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
623 * not supported
624 * @throws IOException If an I/O error occurred
625 *
626 * Exceptions applicable to file systems accessed over RPC:
627 * @throws RpcClientException If an exception occurred in the RPC client
628 * @throws RpcServerException If an exception occurred in the RPC server
629 * @throws UnexpectedServerException If server implementation throws
630 * undeclared exception to RPC server
631 *
632 * RuntimeExceptions:
633 * @throws InvalidPathException If path <code>f</code> is not valid
634 */
635 public FSDataOutputStream create(final Path f,
636 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
637 throws AccessControlException, FileAlreadyExistsException,
638 FileNotFoundException, ParentNotDirectoryException,
639 UnsupportedFileSystemException, IOException {
640 Path absF = fixRelativePart(f);
641
642 // If one of the options is a permission, extract it & apply umask
643 // If not, add a default Perms and apply umask;
644 // AbstractFileSystem#create
645
646 CreateOpts.Perms permOpt =
647 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts);
648 FsPermission permission = (permOpt != null) ? permOpt.getValue() :
649 FsPermission.getDefault();
650 permission = permission.applyUMask(umask);
651
652 final CreateOpts[] updatedOpts =
653 CreateOpts.setOpt(CreateOpts.perms(permission), opts);
654 return new FSLinkResolver<FSDataOutputStream>() {
655 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p)
656 throws IOException {
657 return fs.create(p, createFlag, updatedOpts);
658 }
659 }.resolve(this, absF);
660 }
661
662 /**
663 * Make(create) a directory and all the non-existent parents.
664 *
665 * @param dir - the dir to make
666 * @param permission - permissions is set permission&~umask
667 * @param createParent - if true then missing parent dirs are created if false
668 * then parent must exist
669 *
670 * @throws AccessControlException If access is denied
671 * @throws FileAlreadyExistsException If directory <code>dir</code> already
672 * exists
673 * @throws FileNotFoundException If parent of <code>dir</code> does not exist
674 * and <code>createParent</code> is false
675 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a
676 * directory
677 * @throws UnsupportedFileSystemException If file system for <code>dir</code>
678 * is not supported
679 * @throws IOException If an I/O error occurred
680 *
681 * Exceptions applicable to file systems accessed over RPC:
682 * @throws RpcClientException If an exception occurred in the RPC client
683 * @throws UnexpectedServerException If server implementation throws
684 * undeclared exception to RPC server
685 *
686 * RuntimeExceptions:
687 * @throws InvalidPathException If path <code>dir</code> is not valid
688 */
689 public void mkdir(final Path dir, final FsPermission permission,
690 final boolean createParent) throws AccessControlException,
691 FileAlreadyExistsException, FileNotFoundException,
692 ParentNotDirectoryException, UnsupportedFileSystemException,
693 IOException {
694 final Path absDir = fixRelativePart(dir);
695 final FsPermission absFerms = (permission == null ?
696 FsPermission.getDefault() : permission).applyUMask(umask);
697 new FSLinkResolver<Void>() {
698 public Void next(final AbstractFileSystem fs, final Path p)
699 throws IOException, UnresolvedLinkException {
700 fs.mkdir(p, absFerms, createParent);
701 return null;
702 }
703 }.resolve(this, absDir);
704 }
705
706 /**
707 * Delete a file.
708 * @param f the path to delete.
709 * @param recursive if path is a directory and set to
710 * true, the directory is deleted else throws an exception. In
711 * case of a file the recursive can be set to either true or false.
712 *
713 * @throws AccessControlException If access is denied
714 * @throws FileNotFoundException If <code>f</code> does not exist
715 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
716 * not supported
717 * @throws IOException If an I/O error occurred
718 *
719 * Exceptions applicable to file systems accessed over RPC:
720 * @throws RpcClientException If an exception occurred in the RPC client
721 * @throws RpcServerException If an exception occurred in the RPC server
722 * @throws UnexpectedServerException If server implementation throws
723 * undeclared exception to RPC server
724 *
725 * RuntimeExceptions:
726 * @throws InvalidPathException If path <code>f</code> is invalid
727 */
728 public boolean delete(final Path f, final boolean recursive)
729 throws AccessControlException, FileNotFoundException,
730 UnsupportedFileSystemException, IOException {
731 Path absF = fixRelativePart(f);
732 return new FSLinkResolver<Boolean>() {
733 public Boolean next(final AbstractFileSystem fs, final Path p)
734 throws IOException, UnresolvedLinkException {
735 return Boolean.valueOf(fs.delete(p, recursive));
736 }
737 }.resolve(this, absF);
738 }
739
740 /**
741 * Opens an FSDataInputStream at the indicated Path using
742 * default buffersize.
743 * @param f the file name to open
744 *
745 * @throws AccessControlException If access is denied
746 * @throws FileNotFoundException If file <code>f</code> does not exist
747 * @throws UnsupportedFileSystemException If file system for <code>f</code>
748 * is not supported
749 * @throws IOException If an I/O error occurred
750 *
751 * Exceptions applicable to file systems accessed over RPC:
752 * @throws RpcClientException If an exception occurred in the RPC client
753 * @throws RpcServerException If an exception occurred in the RPC server
754 * @throws UnexpectedServerException If server implementation throws
755 * undeclared exception to RPC server
756 */
757 public FSDataInputStream open(final Path f) throws AccessControlException,
758 FileNotFoundException, UnsupportedFileSystemException, IOException {
759 final Path absF = fixRelativePart(f);
760 return new FSLinkResolver<FSDataInputStream>() {
761 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
762 throws IOException, UnresolvedLinkException {
763 return fs.open(p);
764 }
765 }.resolve(this, absF);
766 }
767
768 /**
769 * Opens an FSDataInputStream at the indicated Path.
770 *
771 * @param f the file name to open
772 * @param bufferSize the size of the buffer to be used.
773 *
774 * @throws AccessControlException If access is denied
775 * @throws FileNotFoundException If file <code>f</code> does not exist
776 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
777 * not supported
778 * @throws IOException If an I/O error occurred
779 *
780 * Exceptions applicable to file systems accessed over RPC:
781 * @throws RpcClientException If an exception occurred in the RPC client
782 * @throws RpcServerException If an exception occurred in the RPC server
783 * @throws UnexpectedServerException If server implementation throws
784 * undeclared exception to RPC server
785 */
786 public FSDataInputStream open(final Path f, final int bufferSize)
787 throws AccessControlException, FileNotFoundException,
788 UnsupportedFileSystemException, IOException {
789 final Path absF = fixRelativePart(f);
790 return new FSLinkResolver<FSDataInputStream>() {
791 public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
792 throws IOException, UnresolvedLinkException {
793 return fs.open(p, bufferSize);
794 }
795 }.resolve(this, absF);
796 }
797
798 /**
799 * Set replication for an existing file.
800 *
801 * @param f file name
802 * @param replication new replication
803 *
804 * @return true if successful
805 *
806 * @throws AccessControlException If access is denied
807 * @throws FileNotFoundException If file <code>f</code> does not exist
808 * @throws IOException If an I/O error occurred
809 *
810 * Exceptions applicable to file systems accessed over RPC:
811 * @throws RpcClientException If an exception occurred in the RPC client
812 * @throws RpcServerException If an exception occurred in the RPC server
813 * @throws UnexpectedServerException If server implementation throws
814 * undeclared exception to RPC server
815 */
816 public boolean setReplication(final Path f, final short replication)
817 throws AccessControlException, FileNotFoundException,
818 IOException {
819 final Path absF = fixRelativePart(f);
820 return new FSLinkResolver<Boolean>() {
821 public Boolean next(final AbstractFileSystem fs, final Path p)
822 throws IOException, UnresolvedLinkException {
823 return Boolean.valueOf(fs.setReplication(p, replication));
824 }
825 }.resolve(this, absF);
826 }
827
828 /**
829 * Renames Path src to Path dst
830 * <ul>
831 * <li
832 * <li>Fails if src is a file and dst is a directory.
833 * <li>Fails if src is a directory and dst is a file.
834 * <li>Fails if the parent of dst does not exist or is a file.
835 * </ul>
836 * <p>
837 * If OVERWRITE option is not passed as an argument, rename fails if the dst
838 * already exists.
839 * <p>
840 * If OVERWRITE option is passed as an argument, rename overwrites the dst if
841 * it is a file or an empty directory. Rename fails if dst is a non-empty
842 * directory.
843 * <p>
844 * Note that atomicity of rename is dependent on the file system
845 * implementation. Please refer to the file system documentation for details
846 * <p>
847 *
848 * @param src path to be renamed
849 * @param dst new path after rename
850 *
851 * @throws AccessControlException If access is denied
852 * @throws FileAlreadyExistsException If <code>dst</code> already exists and
853 * <code>options</options> has {@link Options.Rename#OVERWRITE}
854 * option false.
855 * @throws FileNotFoundException If <code>src</code> does not exist
856 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a
857 * directory
858 * @throws UnsupportedFileSystemException If file system for <code>src</code>
859 * and <code>dst</code> is not supported
860 * @throws IOException If an I/O error occurred
861 *
862 * Exceptions applicable to file systems accessed over RPC:
863 * @throws RpcClientException If an exception occurred in the RPC client
864 * @throws RpcServerException If an exception occurred in the RPC server
865 * @throws UnexpectedServerException If server implementation throws
866 * undeclared exception to RPC server
867 */
868 public void rename(final Path src, final Path dst,
869 final Options.Rename... options) throws AccessControlException,
870 FileAlreadyExistsException, FileNotFoundException,
871 ParentNotDirectoryException, UnsupportedFileSystemException,
872 IOException {
873 final Path absSrc = fixRelativePart(src);
874 final Path absDst = fixRelativePart(dst);
875 AbstractFileSystem srcFS = getFSofPath(absSrc);
876 AbstractFileSystem dstFS = getFSofPath(absDst);
877 if(!srcFS.getUri().equals(dstFS.getUri())) {
878 throw new IOException("Renames across AbstractFileSystems not supported");
879 }
880 try {
881 srcFS.rename(absSrc, absDst, options);
882 } catch (UnresolvedLinkException e) {
883 /* We do not know whether the source or the destination path
884 * was unresolved. Resolve the source path up until the final
885 * path component, then fully resolve the destination.
886 */
887 final Path source = resolveIntermediate(absSrc);
888 new FSLinkResolver<Void>() {
889 public Void next(final AbstractFileSystem fs, final Path p)
890 throws IOException, UnresolvedLinkException {
891 fs.rename(source, p, options);
892 return null;
893 }
894 }.resolve(this, absDst);
895 }
896 }
897
898 /**
899 * Set permission of a path.
900 * @param f
901 * @param permission - the new absolute permission (umask is not applied)
902 *
903 * @throws AccessControlException If access is denied
904 * @throws FileNotFoundException If <code>f</code> does not exist
905 * @throws UnsupportedFileSystemException If file system for <code>f</code>
906 * is not supported
907 * @throws IOException If an I/O error occurred
908 *
909 * Exceptions applicable to file systems accessed over RPC:
910 * @throws RpcClientException If an exception occurred in the RPC client
911 * @throws RpcServerException If an exception occurred in the RPC server
912 * @throws UnexpectedServerException If server implementation throws
913 * undeclared exception to RPC server
914 */
915 public void setPermission(final Path f, final FsPermission permission)
916 throws AccessControlException, FileNotFoundException,
917 UnsupportedFileSystemException, IOException {
918 final Path absF = fixRelativePart(f);
919 new FSLinkResolver<Void>() {
920 public Void next(final AbstractFileSystem fs, final Path p)
921 throws IOException, UnresolvedLinkException {
922 fs.setPermission(p, permission);
923 return null;
924 }
925 }.resolve(this, absF);
926 }
927
928 /**
929 * Set owner of a path (i.e. a file or a directory). The parameters username
930 * and groupname cannot both be null.
931 *
932 * @param f The path
933 * @param username If it is null, the original username remains unchanged.
934 * @param groupname If it is null, the original groupname remains unchanged.
935 *
936 * @throws AccessControlException If access is denied
937 * @throws FileNotFoundException If <code>f</code> does not exist
938 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
939 * not supported
940 * @throws IOException If an I/O error occurred
941 *
942 * Exceptions applicable to file systems accessed over RPC:
943 * @throws RpcClientException If an exception occurred in the RPC client
944 * @throws RpcServerException If an exception occurred in the RPC server
945 * @throws UnexpectedServerException If server implementation throws
946 * undeclared exception to RPC server
947 *
948 * RuntimeExceptions:
949 * @throws HadoopIllegalArgumentException If <code>username</code> or
950 * <code>groupname</code> is invalid.
951 */
952 public void setOwner(final Path f, final String username,
953 final String groupname) throws AccessControlException,
954 UnsupportedFileSystemException, FileNotFoundException,
955 IOException {
956 if ((username == null) && (groupname == null)) {
957 throw new HadoopIllegalArgumentException(
958 "username and groupname cannot both be null");
959 }
960 final Path absF = fixRelativePart(f);
961 new FSLinkResolver<Void>() {
962 public Void next(final AbstractFileSystem fs, final Path p)
963 throws IOException, UnresolvedLinkException {
964 fs.setOwner(p, username, groupname);
965 return null;
966 }
967 }.resolve(this, absF);
968 }
969
970 /**
971 * Set access time of a file.
972 * @param f The path
973 * @param mtime Set the modification time of this file.
974 * The number of milliseconds since epoch (Jan 1, 1970).
975 * A value of -1 means that this call should not set modification time.
976 * @param atime Set the access time of this file.
977 * The number of milliseconds since Jan 1, 1970.
978 * A value of -1 means that this call should not set access time.
979 *
980 * @throws AccessControlException If access is denied
981 * @throws FileNotFoundException If <code>f</code> does not exist
982 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
983 * not supported
984 * @throws IOException If an I/O error occurred
985 *
986 * Exceptions applicable to file systems accessed over RPC:
987 * @throws RpcClientException If an exception occurred in the RPC client
988 * @throws RpcServerException If an exception occurred in the RPC server
989 * @throws UnexpectedServerException If server implementation throws
990 * undeclared exception to RPC server
991 */
992 public void setTimes(final Path f, final long mtime, final long atime)
993 throws AccessControlException, FileNotFoundException,
994 UnsupportedFileSystemException, IOException {
995 final Path absF = fixRelativePart(f);
996 new FSLinkResolver<Void>() {
997 public Void next(final AbstractFileSystem fs, final Path p)
998 throws IOException, UnresolvedLinkException {
999 fs.setTimes(p, mtime, atime);
1000 return null;
1001 }
1002 }.resolve(this, absF);
1003 }
1004
1005 /**
1006 * Get the checksum of a file.
1007 *
1008 * @param f file path
1009 *
1010 * @return The file checksum. The default return value is null,
1011 * which indicates that no checksum algorithm is implemented
1012 * in the corresponding FileSystem.
1013 *
1014 * @throws AccessControlException If access is denied
1015 * @throws FileNotFoundException If <code>f</code> does not exist
1016 * @throws IOException If an I/O error occurred
1017 *
1018 * Exceptions applicable to file systems accessed over RPC:
1019 * @throws RpcClientException If an exception occurred in the RPC client
1020 * @throws RpcServerException If an exception occurred in the RPC server
1021 * @throws UnexpectedServerException If server implementation throws
1022 * undeclared exception to RPC server
1023 */
1024 public FileChecksum getFileChecksum(final Path f)
1025 throws AccessControlException, FileNotFoundException,
1026 IOException {
1027 final Path absF = fixRelativePart(f);
1028 return new FSLinkResolver<FileChecksum>() {
1029 public FileChecksum next(final AbstractFileSystem fs, final Path p)
1030 throws IOException, UnresolvedLinkException {
1031 return fs.getFileChecksum(p);
1032 }
1033 }.resolve(this, absF);
1034 }
1035
1036 /**
1037 * Set the verify checksum flag for the file system denoted by the path.
1038 * This is only applicable if the
1039 * corresponding FileSystem supports checksum. By default doesn't do anything.
1040 * @param verifyChecksum
1041 * @param f set the verifyChecksum for the Filesystem containing this path
1042 *
1043 * @throws AccessControlException If access is denied
1044 * @throws FileNotFoundException If <code>f</code> does not exist
1045 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1046 * not supported
1047 * @throws IOException If an I/O error occurred
1048 *
1049 * Exceptions applicable to file systems accessed over RPC:
1050 * @throws RpcClientException If an exception occurred in the RPC client
1051 * @throws RpcServerException If an exception occurred in the RPC server
1052 * @throws UnexpectedServerException If server implementation throws
1053 * undeclared exception to RPC server
1054 */
1055 public void setVerifyChecksum(final boolean verifyChecksum, final Path f)
1056 throws AccessControlException, FileNotFoundException,
1057 UnsupportedFileSystemException, IOException {
1058 final Path absF = resolve(fixRelativePart(f));
1059 getFSofPath(absF).setVerifyChecksum(verifyChecksum);
1060 }
1061
1062 /**
1063 * Return a file status object that represents the path.
1064 * @param f The path we want information from
1065 *
1066 * @return a FileStatus object
1067 *
1068 * @throws AccessControlException If access is denied
1069 * @throws FileNotFoundException If <code>f</code> does not exist
1070 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1071 * not supported
1072 * @throws IOException If an I/O error occurred
1073 *
1074 * Exceptions applicable to file systems accessed over RPC:
1075 * @throws RpcClientException If an exception occurred in the RPC client
1076 * @throws RpcServerException If an exception occurred in the RPC server
1077 * @throws UnexpectedServerException If server implementation throws
1078 * undeclared exception to RPC server
1079 */
1080 public FileStatus getFileStatus(final Path f) throws AccessControlException,
1081 FileNotFoundException, UnsupportedFileSystemException, IOException {
1082 final Path absF = fixRelativePart(f);
1083 return new FSLinkResolver<FileStatus>() {
1084 public FileStatus next(final AbstractFileSystem fs, final Path p)
1085 throws IOException, UnresolvedLinkException {
1086 return fs.getFileStatus(p);
1087 }
1088 }.resolve(this, absF);
1089 }
1090
1091 /**
1092 * Return a fully qualified version of the given symlink target if it
1093 * has no scheme and authority. Partially and fully qualified paths
1094 * are returned unmodified.
1095 * @param pathFS The AbstractFileSystem of the path
1096 * @param pathWithLink Path that contains the symlink
1097 * @param target The symlink's absolute target
1098 * @return Fully qualified version of the target.
1099 */
1100 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
1101 Path pathWithLink, Path target) {
1102 // NB: makeQualified uses the target's scheme and authority, if
1103 // specified, and the scheme and authority of pathFS, if not.
1104 final String scheme = target.toUri().getScheme();
1105 final String auth = target.toUri().getAuthority();
1106 return (scheme == null && auth == null)
1107 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent())
1108 : target;
1109 }
1110
1111 /**
1112 * Return a file status object that represents the path. If the path
1113 * refers to a symlink then the FileStatus of the symlink is returned.
1114 * The behavior is equivalent to #getFileStatus() if the underlying
1115 * file system does not support symbolic links.
1116 * @param f The path we want information from.
1117 * @return A FileStatus object
1118 *
1119 * @throws AccessControlException If access is denied
1120 * @throws FileNotFoundException If <code>f</code> does not exist
1121 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1122 * not supported
1123 * @throws IOException If an I/O error occurred
1124 */
1125 public FileStatus getFileLinkStatus(final Path f)
1126 throws AccessControlException, FileNotFoundException,
1127 UnsupportedFileSystemException, IOException {
1128 final Path absF = fixRelativePart(f);
1129 return new FSLinkResolver<FileStatus>() {
1130 public FileStatus next(final AbstractFileSystem fs, final Path p)
1131 throws IOException, UnresolvedLinkException {
1132 FileStatus fi = fs.getFileLinkStatus(p);
1133 if (fi.isSymlink()) {
1134 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink()));
1135 }
1136 return fi;
1137 }
1138 }.resolve(this, absF);
1139 }
1140
1141 /**
1142 * Returns the target of the given symbolic link as it was specified
1143 * when the link was created. Links in the path leading up to the
1144 * final path component are resolved transparently.
1145 *
1146 * @param f the path to return the target of
1147 * @return The un-interpreted target of the symbolic link.
1148 *
1149 * @throws AccessControlException If access is denied
1150 * @throws FileNotFoundException If path <code>f</code> does not exist
1151 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1152 * not supported
1153 * @throws IOException If the given path does not refer to a symlink
1154 * or an I/O error occurred
1155 */
1156 public Path getLinkTarget(final Path f) throws AccessControlException,
1157 FileNotFoundException, UnsupportedFileSystemException, IOException {
1158 final Path absF = fixRelativePart(f);
1159 return new FSLinkResolver<Path>() {
1160 public Path next(final AbstractFileSystem fs, final Path p)
1161 throws IOException, UnresolvedLinkException {
1162 FileStatus fi = fs.getFileLinkStatus(p);
1163 return fi.getSymlink();
1164 }
1165 }.resolve(this, absF);
1166 }
1167
1168 /**
1169 * Return blockLocation of the given file for the given offset and len.
1170 * For a nonexistent file or regions, null will be returned.
1171 *
1172 * This call is most helpful with DFS, where it returns
1173 * hostnames of machines that contain the given file.
1174 *
1175 * @param f - get blocklocations of this file
1176 * @param start position (byte offset)
1177 * @param len (in bytes)
1178 *
1179 * @return block locations for given file at specified offset of len
1180 *
1181 * @throws AccessControlException If access is denied
1182 * @throws FileNotFoundException If <code>f</code> does not exist
1183 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1184 * not supported
1185 * @throws IOException If an I/O error occurred
1186 *
1187 * Exceptions applicable to file systems accessed over RPC:
1188 * @throws RpcClientException If an exception occurred in the RPC client
1189 * @throws RpcServerException If an exception occurred in the RPC server
1190 * @throws UnexpectedServerException If server implementation throws
1191 * undeclared exception to RPC server
1192 *
1193 * RuntimeExceptions:
1194 * @throws InvalidPathException If path <code>f</code> is invalid
1195 */
1196 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
1197 @InterfaceStability.Evolving
1198 public BlockLocation[] getFileBlockLocations(final Path f, final long start,
1199 final long len) throws AccessControlException, FileNotFoundException,
1200 UnsupportedFileSystemException, IOException {
1201 final Path absF = fixRelativePart(f);
1202 return new FSLinkResolver<BlockLocation[]>() {
1203 public BlockLocation[] next(final AbstractFileSystem fs, final Path p)
1204 throws IOException, UnresolvedLinkException {
1205 return fs.getFileBlockLocations(p, start, len);
1206 }
1207 }.resolve(this, absF);
1208 }
1209
1210 /**
1211 * Returns a status object describing the use and capacity of the
1212 * file system denoted by the Parh argument p.
1213 * If the file system has multiple partitions, the
1214 * use and capacity of the partition pointed to by the specified
1215 * path is reflected.
1216 *
1217 * @param f Path for which status should be obtained. null means the
1218 * root partition of the default file system.
1219 *
1220 * @return a FsStatus object
1221 *
1222 * @throws AccessControlException If access is denied
1223 * @throws FileNotFoundException If <code>f</code> does not exist
1224 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1225 * not supported
1226 * @throws IOException If an I/O error occurred
1227 *
1228 * Exceptions applicable to file systems accessed over RPC:
1229 * @throws RpcClientException If an exception occurred in the RPC client
1230 * @throws RpcServerException If an exception occurred in the RPC server
1231 * @throws UnexpectedServerException If server implementation throws
1232 * undeclared exception to RPC server
1233 */
1234 public FsStatus getFsStatus(final Path f) throws AccessControlException,
1235 FileNotFoundException, UnsupportedFileSystemException, IOException {
1236 if (f == null) {
1237 return defaultFS.getFsStatus();
1238 }
1239 final Path absF = fixRelativePart(f);
1240 return new FSLinkResolver<FsStatus>() {
1241 public FsStatus next(final AbstractFileSystem fs, final Path p)
1242 throws IOException, UnresolvedLinkException {
1243 return fs.getFsStatus(p);
1244 }
1245 }.resolve(this, absF);
1246 }
1247
1248 /**
1249 * Creates a symbolic link to an existing file. An exception is thrown if
1250 * the symlink exits, the user does not have permission to create symlink,
1251 * or the underlying file system does not support symlinks.
1252 *
1253 * Symlink permissions are ignored, access to a symlink is determined by
1254 * the permissions of the symlink target.
1255 *
1256 * Symlinks in paths leading up to the final path component are resolved
1257 * transparently. If the final path component refers to a symlink some
1258 * functions operate on the symlink itself, these are:
1259 * - delete(f) and deleteOnExit(f) - Deletes the symlink.
1260 * - rename(src, dst) - If src refers to a symlink, the symlink is
1261 * renamed. If dst refers to a symlink, the symlink is over-written.
1262 * - getLinkTarget(f) - Returns the target of the symlink.
1263 * - getFileLinkStatus(f) - Returns a FileStatus object describing
1264 * the symlink.
1265 * Some functions, create() and mkdir(), expect the final path component
1266 * does not exist. If they are given a path that refers to a symlink that
1267 * does exist they behave as if the path referred to an existing file or
1268 * directory. All other functions fully resolve, ie follow, the symlink.
1269 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory,
1270 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations,
1271 * getFsStatus, getFileStatus, exists, and listStatus.
1272 *
1273 * Symlink targets are stored as given to createSymlink, assuming the
1274 * underlying file system is capable of storing a fully qualified URI.
1275 * Dangling symlinks are permitted. FileContext supports four types of
1276 * symlink targets, and resolves them as follows
1277 * <pre>
1278 * Given a path referring to a symlink of form:
1279 *
1280 * <---X--->
1281 * fs://host/A/B/link
1282 * <-----Y----->
1283 *
1284 * In this path X is the scheme and authority that identify the file system,
1285 * and Y is the path leading up to the final path component "link". If Y is
1286 * a symlink itself then let Y' be the target of Y and X' be the scheme and
1287 * authority of Y'. Symlink targets may:
1288 *
1289 * 1. Fully qualified URIs
1290 *
1291 * fs://hostX/A/B/file Resolved according to the target file system.
1292 *
1293 * 2. Partially qualified URIs (eg scheme but no host)
1294 *
1295 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving
1296 * a symlink to hdfs:///A results in an exception because
1297 * HDFS URIs must be fully qualified, while a symlink to
1298 * file:///A will not since Hadoop's local file systems
1299 * require partially qualified URIs.
1300 *
1301 * 3. Relative paths
1302 *
1303 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
1304 * is "../B/file" then [Y'][path] is hdfs://host/B/file
1305 *
1306 * 4. Absolute paths
1307 *
1308 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
1309 * is "/file" then [X][path] is hdfs://host/file
1310 * </pre>
1311 *
1312 * @param target the target of the symbolic link
1313 * @param link the path to be created that points to target
1314 * @param createParent if true then missing parent dirs are created if
1315 * false then parent must exist
1316 *
1317 *
1318 * @throws AccessControlException If access is denied
1319 * @throws FileAlreadyExistsException If file <code>linkcode> already exists
1320 * @throws FileNotFoundException If <code>target</code> does not exist
1321 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a
1322 * directory.
1323 * @throws UnsupportedFileSystemException If file system for
1324 * <code>target</code> or <code>link</code> is not supported
1325 * @throws IOException If an I/O error occurred
1326 */
1327 public void createSymlink(final Path target, final Path link,
1328 final boolean createParent) throws AccessControlException,
1329 FileAlreadyExistsException, FileNotFoundException,
1330 ParentNotDirectoryException, UnsupportedFileSystemException,
1331 IOException {
1332 final Path nonRelLink = fixRelativePart(link);
1333 new FSLinkResolver<Void>() {
1334 public Void next(final AbstractFileSystem fs, final Path p)
1335 throws IOException, UnresolvedLinkException {
1336 fs.createSymlink(target, p, createParent);
1337 return null;
1338 }
1339 }.resolve(this, nonRelLink);
1340 }
1341
1342 /**
1343 * List the statuses of the files/directories in the given path if the path is
1344 * a directory.
1345 *
1346 * @param f is the path
1347 *
1348 * @return an iterator that traverses statuses of the files/directories
1349 * in the given path
1350 *
1351 * @throws AccessControlException If access is denied
1352 * @throws FileNotFoundException If <code>f</code> does not exist
1353 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1354 * not supported
1355 * @throws IOException If an I/O error occurred
1356 *
1357 * Exceptions applicable to file systems accessed over RPC:
1358 * @throws RpcClientException If an exception occurred in the RPC client
1359 * @throws RpcServerException If an exception occurred in the RPC server
1360 * @throws UnexpectedServerException If server implementation throws
1361 * undeclared exception to RPC server
1362 */
1363 public RemoteIterator<FileStatus> listStatus(final Path f) throws
1364 AccessControlException, FileNotFoundException,
1365 UnsupportedFileSystemException, IOException {
1366 final Path absF = fixRelativePart(f);
1367 return new FSLinkResolver<RemoteIterator<FileStatus>>() {
1368 public RemoteIterator<FileStatus> next(
1369 final AbstractFileSystem fs, final Path p)
1370 throws IOException, UnresolvedLinkException {
1371 return fs.listStatusIterator(p);
1372 }
1373 }.resolve(this, absF);
1374 }
1375
1376 /**
1377 * @return an iterator over the corrupt files under the given path
1378 * (may contain duplicates if a file has more than one corrupt block)
1379 * @throws IOException
1380 */
1381 public RemoteIterator<Path> listCorruptFileBlocks(Path path)
1382 throws IOException {
1383 final Path absF = fixRelativePart(path);
1384 return new FSLinkResolver<RemoteIterator<Path>>() {
1385 @Override
1386 public RemoteIterator<Path> next(final AbstractFileSystem fs,
1387 final Path p)
1388 throws IOException, UnresolvedLinkException {
1389 return fs.listCorruptFileBlocks(p);
1390 }
1391 }.resolve(this, absF);
1392 }
1393
1394 /**
1395 * List the statuses of the files/directories in the given path if the path is
1396 * a directory.
1397 * Return the file's status and block locations If the path is a file.
1398 *
1399 * If a returned status is a file, it contains the file's block locations.
1400 *
1401 * @param f is the path
1402 *
1403 * @return an iterator that traverses statuses of the files/directories
1404 * in the given path
1405 * If any IO exception (for example the input directory gets deleted while
1406 * listing is being executed), next() or hasNext() of the returned iterator
1407 * may throw a RuntimeException with the io exception as the cause.
1408 *
1409 * @throws AccessControlException If access is denied
1410 * @throws FileNotFoundException If <code>f</code> does not exist
1411 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1412 * not supported
1413 * @throws IOException If an I/O error occurred
1414 *
1415 * Exceptions applicable to file systems accessed over RPC:
1416 * @throws RpcClientException If an exception occurred in the RPC client
1417 * @throws RpcServerException If an exception occurred in the RPC server
1418 * @throws UnexpectedServerException If server implementation throws
1419 * undeclared exception to RPC server
1420 */
1421 public RemoteIterator<LocatedFileStatus> listLocatedStatus(
1422 final Path f) throws
1423 AccessControlException, FileNotFoundException,
1424 UnsupportedFileSystemException, IOException {
1425 final Path absF = fixRelativePart(f);
1426 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() {
1427 public RemoteIterator<LocatedFileStatus> next(
1428 final AbstractFileSystem fs, final Path p)
1429 throws IOException, UnresolvedLinkException {
1430 return fs.listLocatedStatus(p);
1431 }
1432 }.resolve(this, absF);
1433 }
1434
1435 /**
1436 * Mark a path to be deleted on JVM shutdown.
1437 *
1438 * @param f the existing path to delete.
1439 *
1440 * @return true if deleteOnExit is successful, otherwise false.
1441 *
1442 * @throws AccessControlException If access is denied
1443 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1444 * not supported
1445 * @throws IOException If an I/O error occurred
1446 *
1447 * Exceptions applicable to file systems accessed over RPC:
1448 * @throws RpcClientException If an exception occurred in the RPC client
1449 * @throws RpcServerException If an exception occurred in the RPC server
1450 * @throws UnexpectedServerException If server implementation throws
1451 * undeclared exception to RPC server
1452 */
1453 public boolean deleteOnExit(Path f) throws AccessControlException,
1454 IOException {
1455 if (!this.util().exists(f)) {
1456 return false;
1457 }
1458 synchronized (DELETE_ON_EXIT) {
1459 if (DELETE_ON_EXIT.isEmpty() && !FINALIZER.isAlive()) {
1460 Runtime.getRuntime().addShutdownHook(FINALIZER);
1461 }
1462
1463 Set<Path> set = DELETE_ON_EXIT.get(this);
1464 if (set == null) {
1465 set = new TreeSet<Path>();
1466 DELETE_ON_EXIT.put(this, set);
1467 }
1468 set.add(f);
1469 }
1470 return true;
1471 }
1472
1473 private final Util util;
1474 public Util util() {
1475 return util;
1476 }
1477
1478
1479 /**
1480 * Utility/library methods built over the basic FileContext methods.
1481 * Since this are library functions, the oprtation are not atomic
1482 * and some of them may partially complete if other threads are making
1483 * changes to the same part of the name space.
1484 */
1485 public class Util {
1486 /**
1487 * Does the file exist?
1488 * Note: Avoid using this method if you already have FileStatus in hand.
1489 * Instead reuse the FileStatus
1490 * @param f the file or dir to be checked
1491 *
1492 * @throws AccessControlException If access is denied
1493 * @throws IOException If an I/O error occurred
1494 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1495 * not supported
1496 *
1497 * Exceptions applicable to file systems accessed over RPC:
1498 * @throws RpcClientException If an exception occurred in the RPC client
1499 * @throws RpcServerException If an exception occurred in the RPC server
1500 * @throws UnexpectedServerException If server implementation throws
1501 * undeclared exception to RPC server
1502 */
1503 public boolean exists(final Path f) throws AccessControlException,
1504 UnsupportedFileSystemException, IOException {
1505 try {
1506 FileStatus fs = FileContext.this.getFileStatus(f);
1507 assert fs != null;
1508 return true;
1509 } catch (FileNotFoundException e) {
1510 return false;
1511 }
1512 }
1513
1514 /**
1515 * Return a list of file status objects that corresponds to supplied paths
1516 * excluding those non-existent paths.
1517 *
1518 * @param paths list of paths we want information from
1519 *
1520 * @return a list of FileStatus objects
1521 *
1522 * @throws AccessControlException If access is denied
1523 * @throws IOException If an I/O error occurred
1524 *
1525 * Exceptions applicable to file systems accessed over RPC:
1526 * @throws RpcClientException If an exception occurred in the RPC client
1527 * @throws RpcServerException If an exception occurred in the RPC server
1528 * @throws UnexpectedServerException If server implementation throws
1529 * undeclared exception to RPC server
1530 */
1531 private FileStatus[] getFileStatus(Path[] paths)
1532 throws AccessControlException, IOException {
1533 if (paths == null) {
1534 return null;
1535 }
1536 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
1537 for (int i = 0; i < paths.length; i++) {
1538 try {
1539 results.add(FileContext.this.getFileStatus(paths[i]));
1540 } catch (FileNotFoundException fnfe) {
1541 // ignoring
1542 }
1543 }
1544 return results.toArray(new FileStatus[results.size()]);
1545 }
1546
1547
1548 /**
1549 * Return the {@link ContentSummary} of path f.
1550 * @param f path
1551 *
1552 * @return the {@link ContentSummary} of path f.
1553 *
1554 * @throws AccessControlException If access is denied
1555 * @throws FileNotFoundException If <code>f</code> does not exist
1556 * @throws UnsupportedFileSystemException If file system for
1557 * <code>f</code> is not supported
1558 * @throws IOException If an I/O error occurred
1559 *
1560 * Exceptions applicable to file systems accessed over RPC:
1561 * @throws RpcClientException If an exception occurred in the RPC client
1562 * @throws RpcServerException If an exception occurred in the RPC server
1563 * @throws UnexpectedServerException If server implementation throws
1564 * undeclared exception to RPC server
1565 */
1566 public ContentSummary getContentSummary(Path f)
1567 throws AccessControlException, FileNotFoundException,
1568 UnsupportedFileSystemException, IOException {
1569 FileStatus status = FileContext.this.getFileStatus(f);
1570 if (status.isFile()) {
1571 return new ContentSummary(status.getLen(), 1, 0);
1572 }
1573 long[] summary = {0, 0, 1};
1574 RemoteIterator<FileStatus> statusIterator =
1575 FileContext.this.listStatus(f);
1576 while(statusIterator.hasNext()) {
1577 FileStatus s = statusIterator.next();
1578 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) :
1579 new ContentSummary(s.getLen(), 1, 0);
1580 summary[0] += c.getLength();
1581 summary[1] += c.getFileCount();
1582 summary[2] += c.getDirectoryCount();
1583 }
1584 return new ContentSummary(summary[0], summary[1], summary[2]);
1585 }
1586
1587 /**
1588 * See {@link #listStatus(Path[], PathFilter)}
1589 */
1590 public FileStatus[] listStatus(Path[] files) throws AccessControlException,
1591 FileNotFoundException, IOException {
1592 return listStatus(files, DEFAULT_FILTER);
1593 }
1594
1595 /**
1596 * Filter files/directories in the given path using the user-supplied path
1597 * filter.
1598 *
1599 * @param f is the path name
1600 * @param filter is the user-supplied path filter
1601 *
1602 * @return an array of FileStatus objects for the files under the given path
1603 * after applying the filter
1604 *
1605 * @throws AccessControlException If access is denied
1606 * @throws FileNotFoundException If <code>f</code> does not exist
1607 * @throws UnsupportedFileSystemException If file system for
1608 * <code>pathPattern</code> is not supported
1609 * @throws IOException If an I/O error occurred
1610 *
1611 * Exceptions applicable to file systems accessed over RPC:
1612 * @throws RpcClientException If an exception occurred in the RPC client
1613 * @throws RpcServerException If an exception occurred in the RPC server
1614 * @throws UnexpectedServerException If server implementation throws
1615 * undeclared exception to RPC server
1616 */
1617 public FileStatus[] listStatus(Path f, PathFilter filter)
1618 throws AccessControlException, FileNotFoundException,
1619 UnsupportedFileSystemException, IOException {
1620 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1621 listStatus(results, f, filter);
1622 return results.toArray(new FileStatus[results.size()]);
1623 }
1624
1625 /**
1626 * Filter files/directories in the given list of paths using user-supplied
1627 * path filter.
1628 *
1629 * @param files is a list of paths
1630 * @param filter is the filter
1631 *
1632 * @return a list of statuses for the files under the given paths after
1633 * applying the filter
1634 *
1635 * @throws AccessControlException If access is denied
1636 * @throws FileNotFoundException If a file in <code>files</code> does not
1637 * exist
1638 * @throws IOException If an I/O error occurred
1639 *
1640 * Exceptions applicable to file systems accessed over RPC:
1641 * @throws RpcClientException If an exception occurred in the RPC client
1642 * @throws RpcServerException If an exception occurred in the RPC server
1643 * @throws UnexpectedServerException If server implementation throws
1644 * undeclared exception to RPC server
1645 */
1646 public FileStatus[] listStatus(Path[] files, PathFilter filter)
1647 throws AccessControlException, FileNotFoundException, IOException {
1648 ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1649 for (int i = 0; i < files.length; i++) {
1650 listStatus(results, files[i], filter);
1651 }
1652 return results.toArray(new FileStatus[results.size()]);
1653 }
1654
1655 /*
1656 * Filter files/directories in the given path using the user-supplied path
1657 * filter. Results are added to the given array <code>results</code>.
1658 */
1659 private void listStatus(ArrayList<FileStatus> results, Path f,
1660 PathFilter filter) throws AccessControlException,
1661 FileNotFoundException, IOException {
1662 FileStatus[] listing = listStatus(f);
1663 if (listing != null) {
1664 for (int i = 0; i < listing.length; i++) {
1665 if (filter.accept(listing[i].getPath())) {
1666 results.add(listing[i]);
1667 }
1668 }
1669 }
1670 }
1671
1672 /**
1673 * List the statuses of the files/directories in the given path
1674 * if the path is a directory.
1675 *
1676 * @param f is the path
1677 *
1678 * @return an array that contains statuses of the files/directories
1679 * in the given path
1680 *
1681 * @throws AccessControlException If access is denied
1682 * @throws FileNotFoundException If <code>f</code> does not exist
1683 * @throws UnsupportedFileSystemException If file system for <code>f</code> is
1684 * not supported
1685 * @throws IOException If an I/O error occurred
1686 *
1687 * Exceptions applicable to file systems accessed over RPC:
1688 * @throws RpcClientException If an exception occurred in the RPC client
1689 * @throws RpcServerException If an exception occurred in the RPC server
1690 * @throws UnexpectedServerException If server implementation throws
1691 * undeclared exception to RPC server
1692 */
1693 public FileStatus[] listStatus(final Path f) throws AccessControlException,
1694 FileNotFoundException, UnsupportedFileSystemException,
1695 IOException {
1696 final Path absF = fixRelativePart(f);
1697 return new FSLinkResolver<FileStatus[]>() {
1698 public FileStatus[] next(final AbstractFileSystem fs, final Path p)
1699 throws IOException, UnresolvedLinkException {
1700 return fs.listStatus(p);
1701 }
1702 }.resolve(FileContext.this, absF);
1703 }
1704
1705 /**
1706 * List the statuses and block locations of the files in the given path.
1707 *
1708 * If the path is a directory,
1709 * if recursive is false, returns files in the directory;
1710 * if recursive is true, return files in the subtree rooted at the path.
1711 * The subtree is traversed in the depth-first order.
1712 * If the path is a file, return the file's status and block locations.
1713 * Files across symbolic links are also returned.
1714 *
1715 * @param f is the path
1716 * @param recursive if the subdirectories need to be traversed recursively
1717 *
1718 * @return an iterator that traverses statuses of the files
1719 * If any IO exception (for example a sub-directory gets deleted while
1720 * listing is being executed), next() or hasNext() of the returned iterator
1721 * may throw a RuntimeException with the IO exception as the cause.
1722 *
1723 * @throws AccessControlException If access is denied
1724 * @throws FileNotFoundException If <code>f</code> does not exist
1725 * @throws UnsupportedFileSystemException If file system for <code>f</code>
1726 * is not supported
1727 * @throws IOException If an I/O error occurred
1728 *
1729 * Exceptions applicable to file systems accessed over RPC:
1730 * @throws RpcClientException If an exception occurred in the RPC client
1731 * @throws RpcServerException If an exception occurred in the RPC server
1732 * @throws UnexpectedServerException If server implementation throws
1733 * undeclared exception to RPC server
1734 */
1735 public RemoteIterator<LocatedFileStatus> listFiles(
1736 final Path f, final boolean recursive) throws AccessControlException,
1737 FileNotFoundException, UnsupportedFileSystemException,
1738 IOException {
1739 return new RemoteIterator<LocatedFileStatus>() {
1740 private Stack<RemoteIterator<LocatedFileStatus>> itors =
1741 new Stack<RemoteIterator<LocatedFileStatus>>();
1742 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f);
1743 LocatedFileStatus curFile;
1744
1745 /**
1746 * Returns <tt>true</tt> if the iterator has more files.
1747 *
1748 * @return <tt>true</tt> if the iterator has more files.
1749 * @throws AccessControlException if not allowed to access next
1750 * file's status or locations
1751 * @throws FileNotFoundException if next file does not exist any more
1752 * @throws UnsupportedFileSystemException if next file's
1753 * fs is unsupported
1754 * @throws IOException for all other IO errors
1755 * for example, NameNode is not avaialbe or
1756 * NameNode throws IOException due to an error
1757 * while getting the status or block locations
1758 */
1759 @Override
1760 public boolean hasNext() throws IOException {
1761 while (curFile == null) {
1762 if (curItor.hasNext()) {
1763 handleFileStat(curItor.next());
1764 } else if (!itors.empty()) {
1765 curItor = itors.pop();
1766 } else {
1767 return false;
1768 }
1769 }
1770 return true;
1771 }
1772
1773 /**
1774 * Process the input stat.
1775 * If it is a file, return the file stat.
1776 * If it is a directory, traverse the directory if recursive is true;
1777 * ignore it if recursive is false.
1778 * If it is a symlink, resolve the symlink first and then process it
1779 * depending on if it is a file or directory.
1780 * @param stat input status
1781 * @throws AccessControlException if access is denied
1782 * @throws FileNotFoundException if file is not found
1783 * @throws UnsupportedFileSystemException if fs is not supported
1784 * @throws IOException for all other IO errors
1785 */
1786 private void handleFileStat(LocatedFileStatus stat)
1787 throws IOException {
1788 if (stat.isFile()) { // file
1789 curFile = stat;
1790 } else if (stat.isSymlink()) { // symbolic link
1791 // resolve symbolic link
1792 FileStatus symstat = FileContext.this.getFileStatus(
1793 stat.getSymlink());
1794 if (symstat.isFile() || (recursive && symstat.isDirectory())) {
1795 itors.push(curItor);
1796 curItor = listLocatedStatus(stat.getPath());
1797 }
1798 } else if (recursive) { // directory
1799 itors.push(curItor);
1800 curItor = listLocatedStatus(stat.getPath());
1801 }
1802 }
1803
1804 /**
1805 * Returns the next file's status with its block locations
1806 *
1807 * @throws AccessControlException if not allowed to access next
1808 * file's status or locations
1809 * @throws FileNotFoundException if next file does not exist any more
1810 * @throws UnsupportedFileSystemException if next file's
1811 * fs is unsupported
1812 * @throws IOException for all other IO errors
1813 * for example, NameNode is not avaialbe or
1814 * NameNode throws IOException due to an error
1815 * while getting the status or block locations
1816 */
1817 @Override
1818 public LocatedFileStatus next() throws IOException {
1819 if (hasNext()) {
1820 LocatedFileStatus result = curFile;
1821 curFile = null;
1822 return result;
1823 }
1824 throw new java.util.NoSuchElementException("No more entry in " + f);
1825 }
1826 };
1827 }
1828
1829 /**
1830 * <p>Return all the files that match filePattern and are not checksum
1831 * files. Results are sorted by their names.
1832 *
1833 * <p>
1834 * A filename pattern is composed of <i>regular</i> characters and
1835 * <i>special pattern matching</i> characters, which are:
1836 *
1837 * <dl>
1838 * <dd>
1839 * <dl>
1840 * <p>
1841 * <dt> <tt> ? </tt>
1842 * <dd> Matches any single character.
1843 *
1844 * <p>
1845 * <dt> <tt> * </tt>
1846 * <dd> Matches zero or more characters.
1847 *
1848 * <p>
1849 * <dt> <tt> [<i>abc</i>] </tt>
1850 * <dd> Matches a single character from character set
1851 * <tt>{<i>a,b,c</i>}</tt>.
1852 *
1853 * <p>
1854 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
1855 * <dd> Matches a single character from the character range
1856 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be
1857 * lexicographically less than or equal to character <tt><i>b</i></tt>.
1858 *
1859 * <p>
1860 * <dt> <tt> [^<i>a</i>] </tt>
1861 * <dd> Matches a single char that is not from character set or range
1862 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
1863 * immediately to the right of the opening bracket.
1864 *
1865 * <p>
1866 * <dt> <tt> \<i>c</i> </tt>
1867 * <dd> Removes (escapes) any special meaning of character <i>c</i>.
1868 *
1869 * <p>
1870 * <dt> <tt> {ab,cd} </tt>
1871 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
1872 *
1873 * <p>
1874 * <dt> <tt> {ab,c{de,fh}} </tt>
1875 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt>
1876 *
1877 * </dl>
1878 * </dd>
1879 * </dl>
1880 *
1881 * @param pathPattern a regular expression specifying a pth pattern
1882 *
1883 * @return an array of paths that match the path pattern
1884 *
1885 * @throws AccessControlException If access is denied
1886 * @throws UnsupportedFileSystemException If file system for
1887 * <code>pathPattern</code> is not supported
1888 * @throws IOException If an I/O error occurred
1889 *
1890 * Exceptions applicable to file systems accessed over RPC:
1891 * @throws RpcClientException If an exception occurred in the RPC client
1892 * @throws RpcServerException If an exception occurred in the RPC server
1893 * @throws UnexpectedServerException If server implementation throws
1894 * undeclared exception to RPC server
1895 */
1896 public FileStatus[] globStatus(Path pathPattern)
1897 throws AccessControlException, UnsupportedFileSystemException,
1898 IOException {
1899 return globStatus(pathPattern, DEFAULT_FILTER);
1900 }
1901
1902 /**
1903 * Return an array of FileStatus objects whose path names match pathPattern
1904 * and is accepted by the user-supplied path filter. Results are sorted by
1905 * their path names.
1906 * Return null if pathPattern has no glob and the path does not exist.
1907 * Return an empty array if pathPattern has a glob and no path matches it.
1908 *
1909 * @param pathPattern regular expression specifying the path pattern
1910 * @param filter user-supplied path filter
1911 *
1912 * @return an array of FileStatus objects
1913 *
1914 * @throws AccessControlException If access is denied
1915 * @throws UnsupportedFileSystemException If file system for
1916 * <code>pathPattern</code> is not supported
1917 * @throws IOException If an I/O error occurred
1918 *
1919 * Exceptions applicable to file systems accessed over RPC:
1920 * @throws RpcClientException If an exception occurred in the RPC client
1921 * @throws RpcServerException If an exception occurred in the RPC server
1922 * @throws UnexpectedServerException If server implementation throws
1923 * undeclared exception to RPC server
1924 */
1925 public FileStatus[] globStatus(final Path pathPattern,
1926 final PathFilter filter) throws AccessControlException,
1927 UnsupportedFileSystemException, IOException {
1928 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
1929
1930 String filename = pathPattern.toUri().getPath();
1931
1932 List<String> filePatterns = GlobExpander.expand(filename);
1933 if (filePatterns.size() == 1) {
1934 Path absPathPattern = fixRelativePart(pathPattern);
1935 return globStatusInternal(uri, new Path(absPathPattern.toUri()
1936 .getPath()), filter);
1937 } else {
1938 List<FileStatus> results = new ArrayList<FileStatus>();
1939 for (String iFilePattern : filePatterns) {
1940 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
1941 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
1942 for (FileStatus file : files) {
1943 results.add(file);
1944 }
1945 }
1946 return results.toArray(new FileStatus[results.size()]);
1947 }
1948 }
1949
1950 /**
1951 *
1952 * @param uri for all the inPathPattern
1953 * @param inPathPattern - without the scheme & authority (take from uri)
1954 * @param filter
1955 *
1956 * @return an array of FileStatus objects
1957 *
1958 * @throws AccessControlException If access is denied
1959 * @throws IOException If an I/O error occurred
1960 */
1961 private FileStatus[] globStatusInternal(final URI uri,
1962 final Path inPathPattern, final PathFilter filter)
1963 throws AccessControlException, IOException
1964 {
1965 Path[] parents = new Path[1];
1966 int level = 0;
1967
1968 assert(inPathPattern.toUri().getScheme() == null &&
1969 inPathPattern.toUri().getAuthority() == null &&
1970 inPathPattern.isUriPathAbsolute());
1971
1972
1973 String filename = inPathPattern.toUri().getPath();
1974
1975 // path has only zero component
1976 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
1977 Path p = inPathPattern.makeQualified(uri, null);
1978 return getFileStatus(new Path[]{p});
1979 }
1980
1981 // path has at least one component
1982 String[] components = filename.split(Path.SEPARATOR);
1983
1984 // Path is absolute, first component is "/" hence first component
1985 // is the uri root
1986 parents[0] = new Path(new Path(uri), new Path("/"));
1987 level = 1;
1988
1989 // glob the paths that match the parent path, ie. [0, components.length-1]
1990 boolean[] hasGlob = new boolean[]{false};
1991 Path[] relParentPaths =
1992 globPathsLevel(parents, components, level, hasGlob);
1993 FileStatus[] results;
1994
1995 if (relParentPaths == null || relParentPaths.length == 0) {
1996 results = null;
1997 } else {
1998 // fix the pathes to be abs
1999 Path[] parentPaths = new Path [relParentPaths.length];
2000 for(int i=0; i<relParentPaths.length; i++) {
2001 parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
2002 }
2003
2004 // Now work on the last component of the path
2005 GlobFilter fp =
2006 new GlobFilter(components[components.length - 1], filter);
2007 if (fp.hasPattern()) { // last component has a pattern
2008 // list parent directories and then glob the results
2009 results = listStatus(parentPaths, fp);
2010 hasGlob[0] = true;
2011 } else { // last component does not have a pattern
2012 // get all the path names
2013 ArrayList<Path> filteredPaths =
2014 new ArrayList<Path>(parentPaths.length);
2015 for (int i = 0; i < parentPaths.length; i++) {
2016 parentPaths[i] = new Path(parentPaths[i],
2017 components[components.length - 1]);
2018 if (fp.accept(parentPaths[i])) {
2019 filteredPaths.add(parentPaths[i]);
2020 }
2021 }
2022 // get all their statuses
2023 results = getFileStatus(
2024 filteredPaths.toArray(new Path[filteredPaths.size()]));
2025 }
2026 }
2027
2028 // Decide if the pathPattern contains a glob or not
2029 if (results == null) {
2030 if (hasGlob[0]) {
2031 results = new FileStatus[0];
2032 }
2033 } else {
2034 if (results.length == 0) {
2035 if (!hasGlob[0]) {
2036 results = null;
2037 }
2038 } else {
2039 Arrays.sort(results);
2040 }
2041 }
2042 return results;
2043 }
2044
2045 /*
2046 * For a path of N components, return a list of paths that match the
2047 * components [<code>level</code>, <code>N-1</code>].
2048 */
2049 private Path[] globPathsLevel(Path[] parents, String[] filePattern,
2050 int level, boolean[] hasGlob) throws AccessControlException,
2051 FileNotFoundException, IOException {
2052 if (level == filePattern.length - 1) {
2053 return parents;
2054 }
2055 if (parents == null || parents.length == 0) {
2056 return null;
2057 }
2058 GlobFilter fp = new GlobFilter(filePattern[level]);
2059 if (fp.hasPattern()) {
2060 parents = FileUtil.stat2Paths(listStatus(parents, fp));
2061 hasGlob[0] = true;
2062 } else {
2063 for (int i = 0; i < parents.length; i++) {
2064 parents[i] = new Path(parents[i], filePattern[level]);
2065 }
2066 }
2067 return globPathsLevel(parents, filePattern, level + 1, hasGlob);
2068 }
2069
2070 /**
2071 * Copy file from src to dest. See
2072 * {@link #copy(Path, Path, boolean, boolean)}
2073 */
2074 public boolean copy(final Path src, final Path dst)
2075 throws AccessControlException, FileAlreadyExistsException,
2076 FileNotFoundException, ParentNotDirectoryException,
2077 UnsupportedFileSystemException, IOException {
2078 return copy(src, dst, false, false);
2079 }
2080
2081 /**
2082 * Copy from src to dst, optionally deleting src and overwriting dst.
2083 * @param src
2084 * @param dst
2085 * @param deleteSource - delete src if true
2086 * @param overwrite overwrite dst if true; throw IOException if dst exists
2087 * and overwrite is false.
2088 *
2089 * @return true if copy is successful
2090 *
2091 * @throws AccessControlException If access is denied
2092 * @throws FileAlreadyExistsException If <code>dst</code> already exists
2093 * @throws FileNotFoundException If <code>src</code> does not exist
2094 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not
2095 * a directory
2096 * @throws UnsupportedFileSystemException If file system for
2097 * <code>src</code> or <code>dst</code> is not supported
2098 * @throws IOException If an I/O error occurred
2099 *
2100 * Exceptions applicable to file systems accessed over RPC:
2101 * @throws RpcClientException If an exception occurred in the RPC client
2102 * @throws RpcServerException If an exception occurred in the RPC server
2103 * @throws UnexpectedServerException If server implementation throws
2104 * undeclared exception to RPC server
2105 *
2106 * RuntimeExceptions:
2107 * @throws InvalidPathException If path <code>dst</code> is invalid
2108 */
2109 public boolean copy(final Path src, final Path dst, boolean deleteSource,
2110 boolean overwrite) throws AccessControlException,
2111 FileAlreadyExistsException, FileNotFoundException,
2112 ParentNotDirectoryException, UnsupportedFileSystemException,
2113 IOException {
2114 checkNotSchemeWithRelative(src);
2115 checkNotSchemeWithRelative(dst);
2116 Path qSrc = makeQualified(src);
2117 Path qDst = makeQualified(dst);
2118 checkDest(qSrc.getName(), qDst, overwrite);
2119 FileStatus fs = FileContext.this.getFileStatus(qSrc);
2120 if (fs.isDirectory()) {
2121 checkDependencies(qSrc, qDst);
2122 mkdir(qDst, FsPermission.getDefault(), true);
2123 FileStatus[] contents = listStatus(qSrc);
2124 for (FileStatus content : contents) {
2125 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst,
2126 content.getPath().getName())), deleteSource, overwrite);
2127 }
2128 } else {
2129 InputStream in=null;
2130 OutputStream out = null;
2131 try {
2132 in = open(qSrc);
2133 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of(
2134 CreateFlag.CREATE, CreateFlag.OVERWRITE) :
2135 EnumSet.of(CreateFlag.CREATE);
2136 out = create(qDst, createFlag);
2137 IOUtils.copyBytes(in, out, conf, true);
2138 } catch (IOException e) {
2139 IOUtils.closeStream(out);
2140 IOUtils.closeStream(in);
2141 throw e;
2142 }
2143 }
2144 if (deleteSource) {
2145 return delete(qSrc, true);
2146 } else {
2147 return true;
2148 }
2149 }
2150 }
2151
2152 /**
2153 * Check if copying srcName to dst would overwrite an existing
2154 * file or directory.
2155 * @param srcName File or directory to be copied.
2156 * @param dst Destination to copy srcName to.
2157 * @param overwrite Whether it's ok to overwrite an existing file.
2158 * @throws AccessControlException If access is denied.
2159 * @throws IOException If dst is an existing directory, or dst is an
2160 * existing file and the overwrite option is not passed.
2161 */
2162 private void checkDest(String srcName, Path dst, boolean overwrite)
2163 throws AccessControlException, IOException {
2164 try {
2165 FileStatus dstFs = getFileStatus(dst);
2166 if (dstFs.isDirectory()) {
2167 if (null == srcName) {
2168 throw new IOException("Target " + dst + " is a directory");
2169 }
2170 // Recurse to check if dst/srcName exists.
2171 checkDest(null, new Path(dst, srcName), overwrite);
2172 } else if (!overwrite) {
2173 throw new IOException("Target " + new Path(dst, srcName)
2174 + " already exists");
2175 }
2176 } catch (FileNotFoundException e) {
2177 // dst does not exist - OK to copy.
2178 }
2179 }
2180
2181 //
2182 // If the destination is a subdirectory of the source, then
2183 // generate exception
2184 //
2185 private static void checkDependencies(Path qualSrc, Path qualDst)
2186 throws IOException {
2187 if (isSameFS(qualSrc, qualDst)) {
2188 String srcq = qualSrc.toString() + Path.SEPARATOR;
2189 String dstq = qualDst.toString() + Path.SEPARATOR;
2190 if (dstq.startsWith(srcq)) {
2191 if (srcq.length() == dstq.length()) {
2192 throw new IOException("Cannot copy " + qualSrc + " to itself.");
2193 } else {
2194 throw new IOException("Cannot copy " + qualSrc +
2195 " to its subdirectory " + qualDst);
2196 }
2197 }
2198 }
2199 }
2200
2201 /**
2202 * Are qualSrc and qualDst of the same file system?
2203 * @param qualPath1 - fully qualified path
2204 * @param qualPath2 - fully qualified path
2205 * @return
2206 */
2207 private static boolean isSameFS(Path qualPath1, Path qualPath2) {
2208 URI srcUri = qualPath1.toUri();
2209 URI dstUri = qualPath2.toUri();
2210 return (srcUri.getScheme().equals(dstUri.getScheme()) &&
2211 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri
2212 .getAuthority().equals(dstUri.getAuthority())));
2213 }
2214
2215 /**
2216 * Deletes all the paths in deleteOnExit on JVM shutdown.
2217 */
2218 static class FileContextFinalizer extends Thread {
2219 public synchronized void run() {
2220 processDeleteOnExit();
2221 }
2222 }
2223
2224 /**
2225 * Resolves all symbolic links in the specified path.
2226 * Returns the new path object.
2227 */
2228 protected Path resolve(final Path f) throws FileNotFoundException,
2229 UnresolvedLinkException, AccessControlException, IOException {
2230 return new FSLinkResolver<Path>() {
2231 public Path next(final AbstractFileSystem fs, final Path p)
2232 throws IOException, UnresolvedLinkException {
2233 return fs.resolvePath(p);
2234 }
2235 }.resolve(this, f);
2236 }
2237
2238 /**
2239 * Resolves all symbolic links in the specified path leading up
2240 * to, but not including the final path component.
2241 * @param f path to resolve
2242 * @return the new path object.
2243 */
2244 protected Path resolveIntermediate(final Path f) throws IOException {
2245 return new FSLinkResolver<FileStatus>() {
2246 public FileStatus next(final AbstractFileSystem fs, final Path p)
2247 throws IOException, UnresolvedLinkException {
2248 return fs.getFileLinkStatus(p);
2249 }
2250 }.resolve(this, f).getPath();
2251 }
2252
2253 /**
2254 * Returns the list of AbstractFileSystems accessed in the path. The list may
2255 * contain more than one AbstractFileSystems objects in case of symlinks.
2256 *
2257 * @param f
2258 * Path which needs to be resolved
2259 * @return List of AbstractFileSystems accessed in the path
2260 * @throws IOException
2261 */
2262 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f)
2263 throws IOException {
2264 final Path absF = fixRelativePart(f);
2265 final HashSet<AbstractFileSystem> result
2266 = new HashSet<AbstractFileSystem>();
2267 new FSLinkResolver<Void>() {
2268 public Void next(final AbstractFileSystem fs, final Path p)
2269 throws IOException, UnresolvedLinkException {
2270 result.add(fs);
2271 fs.getFileStatus(p);
2272 return null;
2273 }
2274 }.resolve(this, absF);
2275 return result;
2276 }
2277
2278 /**
2279 * Class used to perform an operation on and resolve symlinks in a
2280 * path. The operation may potentially span multiple file systems.
2281 */
2282 protected abstract class FSLinkResolver<T> {
2283 // The maximum number of symbolic link components in a path
2284 private static final int MAX_PATH_LINKS = 32;
2285
2286 /**
2287 * Generic helper function overridden on instantiation to perform a
2288 * specific operation on the given file system using the given path
2289 * which may result in an UnresolvedLinkException.
2290 * @param fs AbstractFileSystem to perform the operation on.
2291 * @param p Path given the file system.
2292 * @return Generic type determined by the specific implementation.
2293 * @throws UnresolvedLinkException If symbolic link <code>path</code> could
2294 * not be resolved
2295 * @throws IOException an I/O error occured
2296 */
2297 public abstract T next(final AbstractFileSystem fs, final Path p)
2298 throws IOException, UnresolvedLinkException;
2299
2300 /**
2301 * Performs the operation specified by the next function, calling it
2302 * repeatedly until all symlinks in the given path are resolved.
2303 * @param fc FileContext used to access file systems.
2304 * @param p The path to resolve symlinks in.
2305 * @return Generic type determined by the implementation of next.
2306 * @throws IOException
2307 */
2308 public T resolve(final FileContext fc, Path p) throws IOException {
2309 int count = 0;
2310 T in = null;
2311 Path first = p;
2312 // NB: More than one AbstractFileSystem can match a scheme, eg
2313 // "file" resolves to LocalFs but could have come by RawLocalFs.
2314 AbstractFileSystem fs = fc.getFSofPath(p);
2315
2316 // Loop until all symlinks are resolved or the limit is reached
2317 for (boolean isLink = true; isLink;) {
2318 try {
2319 in = next(fs, p);
2320 isLink = false;
2321 } catch (UnresolvedLinkException e) {
2322 if (count++ > MAX_PATH_LINKS) {
2323 throw new IOException("Possible cyclic loop while " +
2324 "following symbolic link " + first);
2325 }
2326 // Resolve the first unresolved path component
2327 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p));
2328 fs = fc.getFSofPath(p);
2329 }
2330 }
2331 return in;
2332 }
2333 }
2334
2335 /**
2336 * Get the statistics for a particular file system
2337 *
2338 * @param uri
2339 * the uri to lookup the statistics. Only scheme and authority part
2340 * of the uri are used as the key to store and lookup.
2341 * @return a statistics object
2342 */
2343 public static Statistics getStatistics(URI uri) {
2344 return AbstractFileSystem.getStatistics(uri);
2345 }
2346
2347 /**
2348 * Clears all the statistics stored in AbstractFileSystem, for all the file
2349 * systems.
2350 */
2351 public static void clearStatistics() {
2352 AbstractFileSystem.clearStatistics();
2353 }
2354
2355 /**
2356 * Prints the statistics to standard output. File System is identified by the
2357 * scheme and authority.
2358 */
2359 public static void printStatistics() {
2360 AbstractFileSystem.printStatistics();
2361 }
2362
2363 /**
2364 * @return Map of uri and statistics for each filesystem instantiated. The uri
2365 * consists of scheme and authority for the filesystem.
2366 */
2367 public static Map<URI, Statistics> getAllStatistics() {
2368 return AbstractFileSystem.getAllStatistics();
2369 }
2370
2371 /**
2372 * Get delegation tokens for the file systems accessed for a given
2373 * path.
2374 * @param p Path for which delegations tokens are requested.
2375 * @param renewer the account name that is allowed to renew the token.
2376 * @return List of delegation tokens.
2377 * @throws IOException
2378 */
2379 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
2380 public List<Token<?>> getDelegationTokens(
2381 Path p, String renewer) throws IOException {
2382 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p);
2383 List<Token<?>> tokenList =
2384 new ArrayList<Token<?>>();
2385 for (AbstractFileSystem afs : afsSet) {
2386 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer);
2387 tokenList.addAll(afsTokens);
2388 }
2389 return tokenList;
2390 }
2391 }