Author: gdusbabek Date: Tue Apr 27 21:24:54 2010 New Revision: 938658 URL: http://svn.apache.org/viewvc?rev=938658&view=rev Log: fix contrib/word_count build. Patch by Jeremy Hanna, reviewed by Gary Dusbabek. CASSANDRA-992.
Added: cassandra/branches/cassandra-0.6/contrib/word_count/ivy.xml Modified: cassandra/branches/cassandra-0.6/contrib/word_count/README.txt cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count_setup cassandra/branches/cassandra-0.6/contrib/word_count/build.xml cassandra/branches/cassandra-0.6/contrib/word_count/storage-conf.xml Modified: cassandra/branches/cassandra-0.6/contrib/word_count/README.txt URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/README.txt?rev=938658&r1=938657&r2=938658&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/README.txt (original) +++ cassandra/branches/cassandra-0.6/contrib/word_count/README.txt Tue Apr 27 21:24:54 2010 @@ -16,3 +16,8 @@ Read the code in src/ for more details. *If you want to point wordcount at a real cluster, modify the seed and listenaddress settings in storage-conf.xml accordingly. + +*For Mac users, the storage-conf.xml uses 127.0.0.2 for the +word_count_setup. Mac OS X doesn't have that address available. +To add it, run this before running bin/word_count_setup: +sudo ifconfig lo0 alias 127.0.0.2 up Modified: cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count?rev=938658&r1=938657&r2=938658&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count (original) +++ cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count Tue Apr 27 21:24:54 2010 @@ -25,13 +25,16 @@ if [ ! -d $cwd/../../../build/classes ]; fi # word_count Jar. -if [ ! -e $cwd/../build/*.jar ]; then +if [ ! -e $cwd/../build/word_count.jar ]; then echo "Unable to locate word_count jar" >&2 exit 1 fi -CLASSPATH=$CLASSPATH:`ls -1 $cwd/../build/*.jar` +CLASSPATH=$CLASSPATH:$cwd/../build/word_count.jar CLASSPATH=$CLASSPATH:.:$cwd/../../../build/classes +for jar in $cwd/../build/lib/jars/*.jar; do + CLASSPATH=$CLASSPATH:$jar +done for jar in $cwd/../../../lib/*.jar; do CLASSPATH=$CLASSPATH:$jar done @@ -50,4 +53,5 @@ if [ "x$JAVA" = "x" ]; then exit 1 fi +#echo $CLASSPATH $JAVA -Xmx1G -ea -cp $CLASSPATH WordCount Modified: cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count_setup URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count_setup?rev=938658&r1=938657&r2=938658&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count_setup (original) +++ cassandra/branches/cassandra-0.6/contrib/word_count/bin/word_count_setup Tue Apr 27 21:24:54 2010 @@ -25,13 +25,16 @@ if [ ! -d $cwd/../../../build/classes ]; fi # word_count Jar. -if [ ! -e $cwd/../build/*.jar ]; then +if [ ! -e $cwd/../build/word_count.jar ]; then echo "Unable to locate word_count jar" >&2 exit 1 fi -CLASSPATH=$CLASSPATH:`ls -1 $cwd/../build/*.jar` +CLASSPATH=$CLASSPATH:$cwd/../build/word_count.jar CLASSPATH=$CLASSPATH:.:$cwd/../../../build/classes +for jar in $cwd/../build/lib/jars/*.jar; do + CLASSPATH=$CLASSPATH:$jar +done for jar in $cwd/../../../lib/*.jar; do CLASSPATH=$CLASSPATH:$jar done Modified: cassandra/branches/cassandra-0.6/contrib/word_count/build.xml URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/build.xml?rev=938658&r1=938657&r2=938658&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/build.xml (original) +++ cassandra/branches/cassandra-0.6/contrib/word_count/build.xml Tue Apr 27 21:24:54 2010 @@ -17,39 +17,58 @@ ~ specific language governing permissions and limitations ~ under the License. --> -<project basedir="." default="jar" name="word_count"> +<project default="jar" name="word_count" xmlns:ivy="antlib:org.apache.ivy.ant"> <property name="cassandra.dir" value="../.." /> - <property name="cassandra.lib" value="" /> + <property name="cassandra.dir.lib" value="${cassandra.dir}/lib" /> <property name="cassandra.classes" value="${cassandra.dir}/build/classes" /> <property name="build.src" value="${basedir}/src" /> - <property name="build.out" value="${basedir}/build" /> - <property name="build.classes" value="${build.out}/classes" /> + <property name="build.dir" value="${basedir}/build" /> + <property name="ivy.lib.dir" value="${build.dir}/lib" /> + <property name="build.classes" value="${build.dir}/classes" /> <property name="final.name" value="word_count" /> + <property name="ivy.version" value="2.1.0" /> + <property name="ivy.url" + value="http://repo2.maven.org/maven2/org/apache/ivy/ivy" /> + + <condition property="ivy.jar.exists"> + <available file="${build.dir}/ivy-${ivy.version}.jar" /> + </condition> + + <path id="autoivy.classpath"> + <fileset dir="${ivy.lib.dir}"> + <include name="**/*.jar" /> + </fileset> + <pathelement location="${build.dir}/ivy-${ivy.version}.jar"/> + </path> + + <path id="wordcount.build.classpath"> + <fileset dir="${ivy.lib.dir}"> + <include name="**/*.jar" /> + </fileset> + <!-- cassandra dependencies --> + <fileset dir="${cassandra.dir.lib}"> + <include name="**/*.jar" /> + </fileset> + <fileset dir="${cassandra.dir}/build/lib/jars"> + <include name="**/*.jar" /> + </fileset> + <pathelement location="${cassandra.classes}" /> + </path> <target name="init"> <mkdir dir="${build.classes}" /> </target> - <target depends="init" name="build"> + <target depends="init,ivy-retrieve-build" name="build"> <javac destdir="${build.classes}"> <src path="${build.src}" /> - <classpath> - <path> - <fileset dir="${cassandra.dir}/lib"> - <include name="**/*.jar" /> - </fileset> - <fileset dir="${cassandra.dir}/build/lib/jars"> - <include name="**/*.jar" /> - </fileset> - <pathelement location="${cassandra.classes}" /> - </path> - </classpath> + <classpath refid="wordcount.build.classpath" /> </javac> </target> <target name="jar" depends="build"> <mkdir dir="${build.classes}/META-INF" /> - <jar jarfile="${build.out}/${final.name}.jar"> + <jar jarfile="${build.dir}/${final.name}.jar"> <fileset dir="${build.classes}" /> <fileset dir="${cassandra.classes}" /> <fileset dir="${cassandra.dir}"> @@ -63,6 +82,30 @@ </target> <target name="clean"> - <delete dir="${build.out}" /> + <delete dir="${build.dir}" /> + </target> + + <!-- + Ivy Specific targets + to fetch Ivy and this project's dependencies + --> + <target name="ivy-download" unless="ivy.jar.exists"> + <echo>Downloading Ivy...</echo> + <mkdir dir="${build.dir}" /> + <get src="${ivy.url}/${ivy.version}/ivy-${ivy.version}.jar" + dest="${build.dir}/ivy-${ivy.version}.jar" usetimestamp="true" /> + </target> + + <target name="ivy-init" depends="ivy-download" unless="ivy.initialized"> + <mkdir dir="${ivy.lib.dir}"/> + <taskdef resource="org/apache/ivy/ant/antlib.xml" + uri="antlib:org.apache.ivy.ant" + classpathref="autoivy.classpath"/> + <property name="ivy.initialized" value="true"/> + </target> + + <target name="ivy-retrieve-build" depends="ivy-init"> + <ivy:retrieve type="jar,source" sync="true" + pattern="${ivy.lib.dir}/[type]s/[artifact]-[revision].[ext]" /> </target> </project> Added: cassandra/branches/cassandra-0.6/contrib/word_count/ivy.xml URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/ivy.xml?rev=938658&view=auto ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/ivy.xml (added) +++ cassandra/branches/cassandra-0.6/contrib/word_count/ivy.xml Tue Apr 27 21:24:54 2010 @@ -0,0 +1,24 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> +<ivy-module version="2.0"> + <info organisation="apache-cassandra" module="word-count"/> + <dependencies> + <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2"/> + </dependencies> +</ivy-module> \ No newline at end of file Modified: cassandra/branches/cassandra-0.6/contrib/word_count/storage-conf.xml URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/contrib/word_count/storage-conf.xml?rev=938658&r1=938657&r2=938658&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/contrib/word_count/storage-conf.xml (original) +++ cassandra/branches/cassandra-0.6/contrib/word_count/storage-conf.xml Tue Apr 27 21:24:54 2010 @@ -83,33 +83,38 @@ ~ An optional `Comment` attribute may be used to attach additional ~ human-readable information about the column family to its definition. ~ - ~ The optional KeysCachedFraction attribute specifies - ~ The fraction of keys per sstable whose locations we keep in + ~ The optional KeysCached attribute specifies + ~ the number of keys per sstable whose locations we keep in ~ memory in "mostly LRU" order. (JUST the key locations, NOT any - ~ column values.) The amount of memory used by the default setting of - ~ 0.01 is comparable to the amount used by the internal per-sstable key - ~ index. Consider increasing this if you have fewer, wider rows. - ~ Set to 0 to disable entirely. + ~ column values.) Specify a fraction (value less than 1), a percentage + ~ (ending in a % sign) or an absolute number of keys to cache. + ~ KeysCached defaults to 200000 keys. ~ ~ The optional RowsCached attribute specifies the number of rows - ~ whose entire contents we cache in memory, either as a fixed number - ~ of rows or as a percent of rows in the ColumnFamily. - ~ Do not use this on ColumnFamilies with large rows, or - ~ ColumnFamilies with high write:read ratios. As with key caching, - ~ valid values are from 0 to 1. The default 0 disables it entirely. + ~ whose entire contents we cache in memory. Do not use this on + ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read + ~ ratios. Specify a fraction (value less than 1), a percentage (ending in + ~ a % sign) or an absolute number of rows to cache. + ~ RowsCached defaults to 0, i.e., row cache is off by default. + ~ + ~ Remember, when using caches as a percentage, they WILL grow with + ~ your data set! --> - <ColumnFamily CompareWith="BytesType" - Name="Standard1" - RowsCached="10%" - KeysCachedFraction="0"/> - <ColumnFamily CompareWith="UTF8Type" Name="Standard2"/> - <ColumnFamily CompareWith="TimeUUIDType" Name="StandardByUUID1"/> - <ColumnFamily ColumnType="Super" + <ColumnFamily Name="Standard1" CompareWith="BytesType"/> + <ColumnFamily Name="Standard2" + CompareWith="UTF8Type" + KeysCached="100%"/> + <ColumnFamily Name="StandardByUUID1" CompareWith="TimeUUIDType" /> + <ColumnFamily Name="Super1" + ColumnType="Super" + CompareWith="BytesType" + CompareSubcolumnsWith="BytesType" /> + <ColumnFamily Name="Super2" + ColumnType="Super" CompareWith="UTF8Type" CompareSubcolumnsWith="UTF8Type" - Name="Super1" - RowsCached="1000" - KeysCachedFraction="0" + RowsCached="10000" + KeysCached="50%" Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/> <!-- @@ -134,6 +139,7 @@ ~ and PropertyFileEndPointSnitch is available in contrib/. --> <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch> + </Keyspace> </Keyspaces> @@ -188,8 +194,6 @@ <DataFileDirectories> <DataFileDirectory>/var/lib/cassandra/data</DataFileDirectory> </DataFileDirectories> - <CalloutLocation>/var/lib/cassandra/callouts</CalloutLocation> - <StagingFileDirectory>/var/lib/cassandra/staging</StagingFileDirectory> <!-- @@ -205,7 +209,7 @@ <!-- Miscellaneous --> <!-- Time to wait for a reply from other nodes before failing the command --> - <RpcTimeoutInMillis>5000</RpcTimeoutInMillis> + <RpcTimeoutInMillis>10000</RpcTimeoutInMillis> <!-- Size to allow commitlog to grow to before creating a new segment --> <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB> @@ -221,6 +225,7 @@ ~ (hostname, name resolution, etc), and the Right Thing is to use the ~ address associated with the hostname (it might not be). --> + <!--<ListenAddress>localhost</ListenAddress>--> <ListenAddress>127.0.0.2</ListenAddress> <!-- internal communications port --> <StoragePort>7000</StoragePort> @@ -261,6 +266,14 @@ <DiskAccessMode>auto</DiskAccessMode> <!-- + ~ Size of compacted row above which to log a warning. (If compacted + ~ rows do not fit in memory, Cassandra will crash. This is explained + ~ in http://wiki.apache.org/cassandra/CassandraLimitations and is + ~ scheduled to be fixed in 0.7.) + --> + <RowWarningThresholdInMB>512</RowWarningThresholdInMB> + + <!-- ~ Buffer size to use when performing contiguous column slices. Increase ~ this to the size of the column slices you typically perform. ~ (Name-based queries are performed with a buffer size of @@ -307,7 +320,7 @@ ~ ColumnFamily before flushing to disk. This is also a per-memtable ~ setting. Use with MemtableThroughputInMB to tune memory usage. --> - <MemtableOperationsInMillions>0.1</MemtableOperationsInMillions> + <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions> <!-- ~ The maximum time to leave a dirty memtable unflushed. ~ (While any affected columnfamilies have unflushed data from a @@ -337,7 +350,7 @@ ~ This is less necessary in Cassandra than in traditional databases ~ since replication reduces the odds of losing data from a failure ~ after writing the log entry but before it actually reaches the disk. - ~ So the other option is "timed," where writes may be acked immediately + ~ So the other option is "periodic," where writes may be acked immediately ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS ~ milliseconds. -->