Author: szetszwo Date: Wed Mar 13 11:00:11 2013 New Revision: 1455883 URL: http://svn.apache.org/r1455883 Log: HDFS-4597. Backport WebHDFS concat.
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java Modified: hadoop/common/branches/branch-1/CHANGES.txt hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FileSystem.java hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FilterFileSystem.java hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/webhdfs.xml hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/PostOpParam.java hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java Modified: hadoop/common/branches/branch-1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/CHANGES.txt (original) +++ hadoop/common/branches/branch-1/CHANGES.txt Wed Mar 13 11:00:11 2013 @@ -74,6 +74,8 @@ Release 1.2.0 - unreleased HDFS-4256 Backport concatenation of files into a single file to branch-1 (sanjay Radia) + HDFS-4597. Backport WebHDFS concat. (szetszwo) + IMPROVEMENTS HDFS-3515. Port HDFS-1457 to branch-1. (eli) Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FileSystem.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FileSystem.java (original) +++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FileSystem.java Wed Mar 13 11:00:11 2013 @@ -669,6 +669,17 @@ public abstract class FileSystem extends */ public abstract FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException; + + /** + * Concat existing files together. + * @param trg the path to the target destination. + * @param psrcs the paths to the sources to use for the concatenation. + * @throws IOException + */ + public void concat(final Path trg, final Path [] srcs) throws IOException { + throw new UnsupportedOperationException("Not implemented by the " + + getClass().getSimpleName() + " FileSystem implementation"); + } /** * Get replication. Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FilterFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FilterFileSystem.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FilterFileSystem.java (original) +++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/fs/FilterFileSystem.java Wed Mar 13 11:00:11 2013 @@ -114,7 +114,11 @@ public class FilterFileSystem extends Fi return fs.append(f, bufferSize, progress); } - /** {@inheritDoc} */ + @Override + public void concat(Path f, Path[] psrcs) throws IOException { + fs.concat(f, psrcs); + } + @Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Modified: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml (original) +++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml Wed Mar 13 11:00:11 2013 @@ -166,6 +166,7 @@ See http://forrest.apache.org/docs/linki <setTimes href="#setTimes(org.apache.hadoop.fs.Path,%20long,%20long)" /> <append href="#append(org.apache.hadoop.fs.Path,%20int,%20org.apache.hadoop.util.Progressable)" /> + <concat href="#concat(org.apache.hadoop.fs.Path,%20org.apache.hadoop.fs.Path[])" /> <delete href="#delete(org.apache.hadoop.fs.Path,%20boolean)" /> </filesystem> </fs> Modified: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/webhdfs.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/webhdfs.xml?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/webhdfs.xml (original) +++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/webhdfs.xml Wed Mar 13 11:00:11 2013 @@ -104,6 +104,9 @@ <li><a href="#APPEND"><code>APPEND</code></a> (see <a href="ext:api/org/apache/hadoop/fs/filesystem/append">FileSystem.append</a>) </li> + <li><a href="#CONCAT"><code>CONCAT</code></a> + (see <a href="ext:api/org/apache/hadoop/fs/filesystem/concat">FileSystem.concat</a>) + </li> </ul></li> <li>HTTP DELETE <ul> @@ -311,6 +314,28 @@ Content-Length: 0 </p> </section> <!-- ***************************************************************************** --> + <section id="CONCAT"> + <title>Concatenate Files</title> +<ul> + <li>Submit a HTTP POST request. + <source> + curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CONCAT&sources=<PATHS>" + </source> +The client receives a response with zero content length: + <source> +HTTP/1.1 200 OK +Content-Length: 0 + </source> + </li> +</ul> +<p> + See also: + <a href="#sources"><code>sources</code></a>, + <a href="ext:api/org/apache/hadoop/fs/filesystem/concat">FileSystem.concat</a> +</p> + </section> + +<!-- ***************************************************************************** --> <section id="OPEN"> <title>Open and Read a File</title> <ul> @@ -1535,6 +1560,22 @@ var fileStatusProperties = </p> </section> <!-- ***************************************************************************** --> + <section id="sources"> + <title>Sources</title> +<table> + <tr><td>Name</td><td><code>sources</code></td></tr> + <tr><td>Description</td><td>A list of source paths.</td></tr> + <tr><td>Type</td><td>String</td></tr> + <tr><td>Default Value</td><td><empty></td></tr> + <tr><td>Valid Values</td><td>A list of comma seperated absolute FileSystem paths without scheme and authority.</td></tr> + <tr><td>Syntax</td><td>Any string.</td></tr> +</table> +<p> + See also: + <a href="#CONCAT"><code>CONCAT</code></a>, +</p> + </section> +<!-- ***************************************************************************** --> <section id="token"> <title>Token</title> <table> Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DistributedFileSystem.java Wed Mar 13 11:00:11 2013 @@ -220,14 +220,14 @@ public class DistributedFileSystem exten } /** - * THIS IS DFS only operations, it is not part of FileSystem - * move blocks from srcs to trg - * and delete srcs afterwards - * all blocks should be the same size + * Move blocks from srcs to trg and delete srcs afterwards. + * The file block sizes must be the same. + * * @param trg existing file to append to * @param psrcs list of files (same block size, same replication) * @throws IOException */ + @Override public void concat(Path trg, Path [] psrcs) throws IOException { String [] srcs = new String [psrcs.length]; for(int i=0; i<psrcs.length; i++) { Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java Wed Mar 13 11:00:11 2013 @@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.web.WebHdf import org.apache.hadoop.hdfs.web.resources.AccessTimeParam; import org.apache.hadoop.hdfs.web.resources.BlockSizeParam; import org.apache.hadoop.hdfs.web.resources.BufferSizeParam; +import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam; import org.apache.hadoop.hdfs.web.resources.DelegationParam; import org.apache.hadoop.hdfs.web.resources.DeleteOpParam; import org.apache.hadoop.hdfs.web.resources.DestinationParam; @@ -401,10 +402,12 @@ public class NamenodeWebHdfsMethods { final DoAsParam doAsUser, @QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT) final PostOpParam op, + @QueryParam(ConcatSourcesParam.NAME) @DefaultValue(ConcatSourcesParam.DEFAULT) + final ConcatSourcesParam concatSrcs, @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT) final BufferSizeParam bufferSize ) throws IOException, InterruptedException { - return post(ugi, delegation, username, doAsUser, ROOT, op, bufferSize); + return post(ugi, delegation, username, doAsUser, ROOT, op, concatSrcs, bufferSize); } /** Handle HTTP POST request. */ @@ -423,11 +426,13 @@ public class NamenodeWebHdfsMethods { @PathParam(UriFsPathParam.NAME) final UriFsPathParam path, @QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT) final PostOpParam op, + @QueryParam(ConcatSourcesParam.NAME) @DefaultValue(ConcatSourcesParam.DEFAULT) + final ConcatSourcesParam concatSrcs, @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT) final BufferSizeParam bufferSize ) throws IOException, InterruptedException { - init(ugi, delegation, username, doAsUser, path, op, bufferSize); + init(ugi, delegation, username, doAsUser, path, op, concatSrcs, bufferSize); return ugi.doAs(new PrivilegedExceptionAction<Response>() { @Override @@ -445,6 +450,11 @@ public class NamenodeWebHdfsMethods { fullpath, op.getValue(), -1L, -1L, bufferSize); return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build(); } + case CONCAT: + { + namenode.concat(fullpath, concatSrcs.getAbsolutePaths()); + return Response.ok().build(); + } default: throw new UnsupportedOperationException(op + " is not supported"); } Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java Wed Mar 13 11:00:11 2013 @@ -61,6 +61,7 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.web.resources.AccessTimeParam; import org.apache.hadoop.hdfs.web.resources.BlockSizeParam; import org.apache.hadoop.hdfs.web.resources.BufferSizeParam; +import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam; import org.apache.hadoop.hdfs.web.resources.DeleteOpParam; import org.apache.hadoop.hdfs.web.resources.DestinationParam; import org.apache.hadoop.hdfs.web.resources.GetOpParam; @@ -673,6 +674,15 @@ public class WebHdfsFileSystem extends F } @Override + public void concat(final Path trg, final Path [] srcs) throws IOException { + statistics.incrementWriteOps(1); + final HttpOpParam.Op op = PostOpParam.Op.CONCAT; + + ConcatSourcesParam param = new ConcatSourcesParam(srcs); + run(op, trg, param); + } + + @Override public FSDataOutputStream create(final Path f, final FsPermission permission, final boolean overwrite, final int bufferSize, final short replication, final long blockSize, final Progressable progress) throws IOException { Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java?rev=1455883&view=auto ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java (added) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java Wed Mar 13 11:00:11 2013 @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.web.resources; + +import org.apache.hadoop.fs.Path; + +/** The concat source paths parameter. */ +public class ConcatSourcesParam extends StringParam { + /** Parameter name. */ + public static final String NAME = "sources"; + + public static final String DEFAULT = ""; + + private static final Domain DOMAIN = new Domain(NAME, null); + + private static String paths2String(Path[] paths) { + if (paths == null || paths.length == 0) { + return ""; + } + final StringBuilder b = new StringBuilder(paths[0].toUri().getPath()); + for(int i = 1; i < paths.length; i++) { + b.append(',').append(paths[i].toUri().getPath()); + } + return b.toString(); + } + + /** + * Constructor. + * @param str a string representation of the parameter value. + */ + public ConcatSourcesParam(String str) { + super(DOMAIN, str); + } + + public ConcatSourcesParam(Path[] paths) { + this(paths2String(paths)); + } + + @Override + public String getName() { + return NAME; + } + + /** @return the absolute path. */ + public final String[] getAbsolutePaths() { + final String[] paths = getValue().split(","); + return paths; + } +} Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/PostOpParam.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/PostOpParam.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/PostOpParam.java (original) +++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/web/resources/PostOpParam.java Wed Mar 13 11:00:11 2013 @@ -23,13 +23,17 @@ import java.net.HttpURLConnection; public class PostOpParam extends HttpOpParam<PostOpParam.Op> { /** Post operations. */ public static enum Op implements HttpOpParam.Op { - APPEND(HttpURLConnection.HTTP_OK), + APPEND(true, HttpURLConnection.HTTP_OK), - NULL(HttpURLConnection.HTTP_NOT_IMPLEMENTED); + CONCAT(false, HttpURLConnection.HTTP_OK), + NULL(false, HttpURLConnection.HTTP_NOT_IMPLEMENTED); + + final boolean doOutputAndRedirect; final int expectedHttpResponseCode; - Op(final int expectedHttpResponseCode) { + Op(final boolean doOutputAndRedirect, final int expectedHttpResponseCode) { + this.doOutputAndRedirect = doOutputAndRedirect; this.expectedHttpResponseCode = expectedHttpResponseCode; } @@ -40,12 +44,12 @@ public class PostOpParam extends HttpOpP @Override public boolean getDoOutput() { - return true; + return doOutputAndRedirect; } @Override public boolean getRedirect() { - return true; + return doOutputAndRedirect; } @Override Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java?rev=1455883&r1=1455882&r2=1455883&view=diff ============================================================================== --- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java (original) +++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java Wed Mar 13 11:00:11 2013 @@ -28,10 +28,12 @@ import java.security.PrivilegedException import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSMainOperationsBaseTest; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.datanode.web.resources.DatanodeWebHdfsMethods; import org.apache.hadoop.hdfs.web.resources.ExceptionHandler; @@ -56,6 +58,7 @@ public class TestFSMainOperationsWebHdfs public static void setupCluster() { final Configuration conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); try { cluster = new MiniDFSCluster(conf, 2, true, null); cluster.waitActive(); @@ -97,6 +100,30 @@ public class TestFSMainOperationsWebHdfs return defaultWorkingDirectory; } + @Test + public void testConcat() throws Exception { + Path[] paths = {new Path("/test/hadoop/file1"), + new Path("/test/hadoop/file2"), + new Path("/test/hadoop/file3")}; + + DFSTestUtil.createFile(fSys, paths[0], 1024, (short) 3, 0); + DFSTestUtil.createFile(fSys, paths[1], 1024, (short) 3, 0); + DFSTestUtil.createFile(fSys, paths[2], 1024, (short) 3, 0); + + Path catPath = new Path("/test/hadoop/catFile"); + DFSTestUtil.createFile(fSys, catPath, 1024, (short) 3, 0); + Assert.assertTrue(exists(fSys, catPath)); + + fSys.concat(catPath, paths); + + Assert.assertFalse(exists(fSys, paths[0])); + Assert.assertFalse(exists(fSys, paths[1])); + Assert.assertFalse(exists(fSys, paths[2])); + + FileStatus fileStatus = fSys.getFileStatus(catPath); + Assert.assertEquals(1024*4, fileStatus.getLen()); + } + //copied from trunk. @Test public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception {