[ https://issues.apache.org/jira/browse/MAPREDUCE-7376?focusedWorklogId=765913&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-765913 ]
ASF GitHub Bot logged work on MAPREDUCE-7376: --------------------------------------------- Author: ASF GitHub Bot Created on: 04/May/22 10:37 Start Date: 04/May/22 10:37 Worklog Time Spent: 10m Work Description: ayushtkn commented on code in PR #4257: URL: https://github.com/apache/hadoop/pull/4257#discussion_r864687143 ########## hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java: ########## @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.examples; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.security.Permission; + +import org.junit.After; +import org.junit.Test; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.HadoopTestCase; +import org.apache.hadoop.util.ExitUtil.ExitException; + +import static org.junit.Assert.assertEquals; + +public class TestAggregateWordCount extends HadoopTestCase { + public TestAggregateWordCount() throws IOException { + super(LOCAL_MR, LOCAL_FS, 1, 1); + } + + @After + public void tearDown() throws Exception { + getFileSystem().delete(TEST_DIR, true); + super.tearDown(); + } + + // Input/Output paths for sort + private static final Path TEST_DIR = new Path( + new File(System.getProperty("test.build.data", "/tmp"), + "aggregatewordcount").getAbsoluteFile().toURI().toString()); + + private static final Path INPUT_PATH = new Path(TEST_DIR, "inPath"); + private static final Path OUTPUT_PATH = new Path(TEST_DIR, "outPath"); + + @Test + public void testAggregateTestCount() + throws IOException, ClassNotFoundException, InterruptedException { + SecurityManager securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + try { + FileSystem fs = getFileSystem(); + fs.mkdirs(INPUT_PATH); + Path file1 = new Path(INPUT_PATH, "file1"); + Path file2 = new Path(INPUT_PATH, "file2"); + FileUtil.write(fs, file1, "Hello World"); + FileUtil.write(fs, file2, "Hello Hadoop"); + + String[] args = + new String[] {INPUT_PATH.toString(), OUTPUT_PATH.toString(), "1", + "textinputformat"}; + + // Run AggregateWordCount Job. + try { + AggregateWordCount.main(args); + } catch (ExitException e) { + // Ignore + } + + String allEntries; + try (FSDataInputStream stream = fs + .open(new Path(OUTPUT_PATH, "part-r-00000"));) { + allEntries = IOUtils.toString(stream, Charset.defaultCharset()); + } + + assertEquals("Hadoop\t1\n" + "Hello\t2\n" + "World\t1\n", allEntries); Review Comment: Yeps, It is always sorted, that is what the javadoc of the job says: ``` This is an example Aggregated Hadoop Map/Reduce application. It reads the text input files, breaks each line into words and counts them. **The output is a locally sorted list of words and the count of how often they occurred.** ``` Issue Time Tracking ------------------- Worklog Id: (was: 765913) Time Spent: 1h (was: 50m) > AggregateWordCount fetches wrong results > ---------------------------------------- > > Key: MAPREDUCE-7376 > URL: https://issues.apache.org/jira/browse/MAPREDUCE-7376 > Project: Hadoop Map/Reduce > Issue Type: Bug > Reporter: Ayush Saxena > Assignee: Ayush Saxena > Priority: Major > Labels: pull-request-available > Time Spent: 1h > Remaining Estimate: 0h > > AggregateWordCount rather than counting the words, gives a single line > output counting the number of rows > Wrong Result Looks Like: > {noformat} > hadoop-3.4.0-SNAPSHOT % bin/hdfs dfs -cat /testOut1/part-r-00000 > record_count 2 > {noformat} > Correct Should Look Like: > {noformat} > hadoop-3.4.0-SNAPSHOT % bin/hdfs dfs -cat /testOut1/part-r-00000 > > Bye 1 > Goodbye 1 > Hadoop 2 > Hello 2 > World 2 > {noformat} -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: mapreduce-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: mapreduce-issues-h...@hadoop.apache.org