Re: dynamic mapper?

2012-03-28 Thread madhu phatak
Hi,
 You can use java API's to compile custom java code and create jars. For
example , look at this code from Sqoop

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * License); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop.orm;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobConf;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.util.FileListing;

import com.cloudera.sqoop.util.Jars;

/**
 * Manages the compilation of a bunch of .java files into .class files
 * and eventually a jar.
 *
 * Also embeds this program's jar into the lib/ directory inside the
compiled
 * jar to ensure that the job runs correctly.
 */
public class CompilationManager {

  /** If we cannot infer a jar name from a table name, etc., use this. */
  public static final String DEFAULT_CODEGEN_JAR_NAME =
  sqoop-codegen-created.jar;

  public static final Log LOG = LogFactory.getLog(
  CompilationManager.class.getName());

  private SqoopOptions options;
  private ListString sources;

  public CompilationManager(final SqoopOptions opts) {
options = opts;
sources = new ArrayListString();
  }

  public void addSourceFile(String sourceName) {
sources.add(sourceName);
  }

  /**
   * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home.
   * If that doesn't work, check our classpath.
   * @return the filename of the hadoop-*-core.jar file.
   */
  private String findHadoopCoreJar() {
String hadoopHome = options.getHadoopHome();

if (null == hadoopHome) {
  LOG.info($HADOOP_HOME is not set);
  return Jars.getJarPathForClass(JobConf.class);
}

if (!hadoopHome.endsWith(File.separator)) {
  hadoopHome = hadoopHome + File.separator;
}

File hadoopHomeFile = new File(hadoopHome);
LOG.info(HADOOP_HOME is  + hadoopHomeFile.getAbsolutePath());
File [] entries = hadoopHomeFile.listFiles();

if (null == entries) {
  LOG.warn(HADOOP_HOME appears empty or missing);
  return Jars.getJarPathForClass(JobConf.class);
}

for (File f : entries) {
  if (f.getName().startsWith(hadoop-)
   f.getName().endsWith(-core.jar)) {
LOG.info(Found hadoop core jar at:  + f.getAbsolutePath());
return f.getAbsolutePath();
  }
}

return Jars.getJarPathForClass(JobConf.class);
  }

  /**
   * Compile the .java files into .class files via embedded javac call.
   * On success, move .java files to the code output dir.
   */
  public void compile() throws IOException {
ListString args = new ArrayListString();

// ensure that the jar output dir exists.
String jarOutDir = options.getJarOutputDir();
File jarOutDirObj = new File(jarOutDir);
if (!jarOutDirObj.exists()) {
  boolean mkdirSuccess = jarOutDirObj.mkdirs();
  if (!mkdirSuccess) {
LOG.debug(Warning: Could not make directories for  + jarOutDir);
  }
} else if (LOG.isDebugEnabled()) {
  LOG.debug(Found existing  + jarOutDir);
}

// Make sure jarOutDir ends with a '/'.
if (!jarOutDir.endsWith(File.separator)) {
  jarOutDir = jarOutDir + File.separator;
}

// find hadoop-*-core.jar for classpath.
String coreJar = findHadoopCoreJar();
if (null == coreJar) {
  // Couldn't find a core jar to insert into the CP for compilation.
If,
  // however, we're running this from a unit test, then the path to the
  // .class files might be set via the hadoop.alt.classpath property
  // instead. Check there first.
  String coreClassesPath = System.getProperty(hadoop.alt.classpath);
  if (null == coreClassesPath) {
// no -- we're out of options. Fail.
throw new IOException(Could not find hadoop core jar!);
  } else {
coreJar = coreClassesPath;
  }
}

// find sqoop jar for compilation 

dynamic mapper?

2012-03-14 Thread robert
Suppose I want to generate a mapper class at run time and use that
class in my MapReduce job.

What is the best way to do this? Would I just have an extra scripted
step to pre-compile it and distribute with -libjars, or if I felt like
compiling it dynamically with for example JavaCompiler is there some
elegant way to distribute the class at run time?