Greetings,
Cassandra recently released 2.0; Mojo's Cassandra Maven Plugin is
currently based on 1.2.1-1.
Attached is a works-for-me patch to update the plugin to Cassandra 2.0.0.
Hopefully it's enough to send an email here; I didn't find any specific
contribution instructions, and the issue tracker seems to be locked down.
Cheers,
--
Eric Evans
[email protected]
Index: pom.xml
===================================================================
--- pom.xml (revision 18746)
+++ pom.xml (working copy)
@@ -30,7 +30,7 @@
</parent>
<artifactId>cassandra-maven-plugin</artifactId>
- <version>1.2.1-1</version>
+ <version>2.0.0</version>
<packaging>maven-plugin</packaging>
<name>Mojo's Cassandra Maven Plugin</name>
@@ -79,7 +79,7 @@
<properties>
<mavenVersion>2.2.1</mavenVersion>
- <cassandraVersion>1.2.1</cassandraVersion>
+ <cassandraVersion>2.0.0</cassandraVersion>
</properties>
<dependencies>
@@ -200,7 +200,7 @@
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
- <version>2.4.1</version>
+ <version>2.5</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
@@ -260,11 +260,11 @@
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
- <version>2.3.1</version>
+ <version>2.4</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
- <version>2.5</version>
+ <version>2.6</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
Index: src/it/smoke/pom.xml
===================================================================
--- src/it/smoke/pom.xml (revision 18746)
+++ src/it/smoke/pom.xml (working copy)
@@ -53,7 +53,7 @@
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
- <version>2.4</version>
+ <version>2.5</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
@@ -73,11 +73,11 @@
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
- <version>2.3</version>
+ <version>2.4</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
- <version>2.4.2</version>
+ <version>2.6</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
Index: src/it/spaces in path/pom.xml
===================================================================
--- src/it/spaces in path/pom.xml (revision 18746)
+++ src/it/spaces in path/pom.xml (working copy)
@@ -53,7 +53,7 @@
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
- <version>2.4</version>
+ <version>2.5</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
@@ -73,11 +73,11 @@
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
- <version>2.3</version>
+ <version>2.4</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
- <version>2.4.2</version>
+ <version>2.6</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
Index: src/main/resources/cassandra.yaml
===================================================================
--- src/main/resources/cassandra.yaml (revision 18746)
+++ src/main/resources/cassandra.yaml (working copy)
@@ -21,27 +21,25 @@
#
# If you already have a cluster with 1 token per node, and wish to migrate to
# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
-# num_tokens: 256
+num_tokens: 256
-# If you haven't specified num_tokens, or have set it to the default of 1 then
-# you should always specify InitialToken when setting up a production
-# cluster for the first time, and often when adding capacity later.
-# The principle is that each node should be given an equal slice of
-# the token ring; see http://wiki.apache.org/cassandra/Operations
-# for more details.
-#
-# If blank, Cassandra will request a token bisecting the range of
-# the heaviest-loaded existing node. If there is no load information
-# available, such as is the case with a new cluster, it will pick
-# a random token, which will lead to hot spots.
-initial_token:
+# initial_token allows you to specify tokens manually. While you can use # it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a
+# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
+# that do not have vnodes enabled.
+# initial_token:
# See http://wiki.apache.org/cassandra/HintedHandoff
hinted_handoff_enabled: true
# this defines the maximum amount of time a dead host will have hints
-# generated. After it has been dead this long, hints will be dropped.
+# generated. After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
max_hint_window_in_ms: 10800000 # 3 hours
-# throttle in KB's per second, per delivery thread
+# Maximum throttle in KBs per second, per delivery thread. This will be
+# reduced proportionally to the number of nodes in the cluster. (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
hinted_handoff_throttle_in_kb: 1024
# Number of threads with which to deliver hints;
# Consider increasing this number when you have multi-dc deployments, since
@@ -53,12 +51,31 @@
# Defaults to: false
# populate_io_cache_on_flush: false
-# authentication backend, implementing IAuthenticator; used to identify users
-authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+# users. It keeps usernames and hashed passwords in system_auth.credentials table.
+# Please increase system_auth keyspace replication factor if you use this authenticator.
+authenticator: AllowAllAuthenticator
-# authorization backend, implementing IAuthorizer; used to limit access/provide permissions
-authorizer: org.apache.cassandra.auth.AllowAllAuthorizer
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
+# increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
# The partitioner is responsible for distributing rows (by key) across
# nodes in the cluster. Any IPartitioner may be used, including your
# own as long as it is on the classpath. Out of the box, Cassandra
@@ -75,14 +92,16 @@
# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
# - keys in a less-efficient format and only works with keys that are
# UTF8-encoded Strings.
-# - CollatingOPP colates according to EN,US rules rather than lexical byte
+# - CollatingOPP collates according to EN,US rules rather than lexical byte
# ordering. Use this as an example if you need custom collation.
#
# See http://wiki.apache.org/cassandra/Operations for more on
# partitioners and token selection.
partitioner: org.apache.cassandra.dht.Murmur3Partitioner
-# directories where Cassandra should store data on disk.
+# Directories where Cassandra should store data on disk. Cassandra
+# will spread data evenly across them, subject to the granularity of
+# the configured compaction strategy.
data_file_directories:
- /var/lib/cassandra/data
@@ -91,7 +110,7 @@
# policy for data disk failures:
# stop: shut down gossip and Thrift, leaving the node effectively dead, but
-# still inspectable via JMX.
+# can still be inspected via JMX.
# best_effort: stop using the failed disk and respond to requests based on
# remaining available sstables. This means you WILL see obsolete
# data at CL.ONE!
@@ -103,8 +122,8 @@
# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
# minimum, sometimes more. The key cache is fairly tiny for the amount of
# time it saves, so it's worthwhile to use it at large numbers.
-# The row cache saves even more time, but must store the whole values of
-# its rows, so it is extremely space-intensive. It's best to only use the
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
# row cache if you have hot rows or static rows.
#
# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
@@ -113,7 +132,7 @@
key_cache_size_in_mb:
# Duration in seconds after which Cassandra should
-# safe the keys cache. Caches are saved to saved_caches_directory as
+# save the key cache. Caches are saved to saved_caches_directory as
# specified in this configuration file.
#
# Saved caches greatly improve cold-start speeds, and is relatively cheap in
@@ -148,21 +167,18 @@
# Disabled by default, meaning all keys are going to be saved
# row_cache_keys_to_save: 100
-# The provider for the row cache to use.
+# The off-heap memory allocator. Affects storage engine metadata as
+# well as caches. Experiments show that JEMAlloc saves some memory
+# than the native GCC allocator (i.e., JEMalloc is more
+# fragmentation-resistant).
+#
+# Supported values are: NativeAllocator, JEMallocAllocator
#
-# Supported values are: ConcurrentLinkedHashCacheProvider, SerializingCacheProvider
+# If you intend to use JEMallocAllocator you have to install JEMalloc as library and
+# modify cassandra-env.sh as directed in the file.
#
-# SerializingCacheProvider serialises the contents of the row and stores
-# it in native memory, i.e., off the JVM Heap. Serialized rows take
-# significantly less memory than "live" rows in the JVM, so you can cache
-# more rows in a given memory footprint. And storing the cache off-heap
-# means you can use smaller heap sizes, reducing the impact of GC pauses.
-#
-# It is also valid to specify the fully-qualified class name to a class
-# that implements org.apache.cassandra.cache.IRowCacheProvider.
-#
-# Defaults to SerializingCacheProvider
-row_cache_provider: SerializingCacheProvider
+# Defaults to NativeAllocator
+# memory_allocator: NativeAllocator
# saved caches
saved_caches_directory: /var/lib/cassandra/saved_caches
@@ -184,7 +200,7 @@
# The size of the individual commitlog file segments. A commitlog
# segment may be archived, deleted, or recycled once all the data
-# in it (potentally from each columnfamily in the system) has been
+# in it (potentially from each columnfamily in the system) has been
# flushed to sstables.
#
# The default size is 32, which is almost always fine, but if you are
@@ -206,31 +222,6 @@
# Ex: "<ip1>,<ip2>,<ip3>"
- seeds: "127.0.0.1"
-# emergency pressure valve: each time heap usage after a full (CMS)
-# garbage collection is above this fraction of the max, Cassandra will
-# flush the largest memtables.
-#
-# Set to 1.0 to disable. Setting this lower than
-# CMSInitiatingOccupancyFraction is not likely to be useful.
-#
-# RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
-# it is most effective under light to moderate load, or read-heavy
-# workloads; under truly massive write load, it will often be too
-# little, too late.
-flush_largest_memtables_at: 0.75
-
-# emergency pressure valve #2: the first time heap usage after a full
-# (CMS) garbage collection is above this fraction of the max,
-# Cassandra will reduce cache maximum _capacity_ to the given fraction
-# of the current _size_. Should usually be set substantially above
-# flush_largest_memtables_at, since that will have less long-term
-# impact on the system.
-#
-# Set to 1.0 to disable. Setting this lower than
-# CMSInitiatingOccupancyFraction is not likely to be useful.
-reduce_cache_sizes_at: 0.85
-reduce_cache_capacity_to: 0.6
-
# For workloads with more data than can fit in memory, Cassandra's
# bottleneck will be reads that need to fetch data from
# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
@@ -273,7 +264,7 @@
# Whether to, when doing sequential writing, fsync() at intervals in
# order to force the operating system to flush the dirty
# buffers. Enable this to avoid sudden dirty buffer flushing from
-# impacting read latencies. Almost always a good idea on SSD:s; not
+# impacting read latencies. Almost always a good idea on SSDs; not
# necessarily on platters.
trickle_fsync: false
trickle_fsync_interval_in_kb: 10240
@@ -290,7 +281,7 @@
# communicate!
#
# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
-# will always do the Right Thing *if* the node is properly configured
+# will always do the Right Thing _if_ the node is properly configured
# (hostname, name resolution, etc), and the Right Thing is to use the
# address associated with the hostname (it might not be).
#
@@ -301,31 +292,35 @@
# Leaving this blank will set it to the same value as listen_address
# broadcast_address: 1.2.3.4
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
# Whether to start the native transport server.
-# Currently, only the thrift server is started by default because the native
-# transport is considered beta.
# Please note that the address on which the native transport is bound is the
# same as the rpc_address. The port however is different and specified below.
-start_native_transport: false
+start_native_transport: true
# port for the CQL native transport to listen for clients on
native_transport_port: 9042
-# The minimum and maximum threads for handling requests when the native
-# transport is used. The meaning is those is similar to the one of
-# rpc_min_threads and rpc_max_threads, though the default differ slightly and
-# are the ones below:
-# native_transport_min_threads: 16
+# The maximum threads for handling requests when the native transport is used.
+# This is similar to rpc_max_threads though the default differs slightly (and
+# there is no native_transport_min_threads, idle threads will always be stopped
+# after 30 seconds).
# native_transport_max_threads: 128
-
# Whether to start the thrift rpc server.
start_rpc: true
-# The address to bind the Thrift RPC service to -- clients connect
-# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
-# you want Thrift to listen on all interfaces.
-#
+
+# The address to bind the Thrift RPC service and native transport
+# server -- clients connect here.
+#
# Leaving this blank has the same effect it does for ListenAddress,
# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike ListenAddress above, it is allowed to specify 0.0.0.0
+# here if you want to listen on all interfaces but is not best practice
+# as it is known to confuse the node auto-discovery features of some
+# client drivers.
rpc_address: localhost
# port for Thrift to listen for clients on
rpc_port: 9160
@@ -336,7 +331,7 @@
# Cassandra provides three out-of-the-box options for the RPC Server:
#
# sync -> One thread per thrift connection. For a very large number of clients, memory
-# will be your limiting factor. On a 64 bit JVM, 128KB is the minimum stack size
+# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
# per thread, and that will correspond to your use of virtual memory (but physical memory
# may be limited depending on use of stack space).
#
@@ -358,7 +353,7 @@
# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
# RPC server, it also dictates the number of clients that can be connected at all).
#
-# The default is unlimited and thus provide no protection against clients overwhelming the server. You are
+# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
#
@@ -369,16 +364,24 @@
# rpc_send_buff_size_in_bytes:
# rpc_recv_buff_size_in_bytes:
-# Frame size for thrift (maximum field length).
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and: man tcp
+# internode_send_buff_size_in_bytes:
+# internode_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum message length).
thrift_framed_transport_size_in_mb: 15
-# The max length of a thrift message, including all fields and
-# internal thrift overhead.
-thrift_max_message_length_in_mb: 16
-
# Set to true to have Cassandra create a hard link to each sstable
# flushed or streamed locally in a backups/ subdirectory of the
-# Keyspace data. Removing these links is the operator's
+# keyspace data. Removing these links is the operator's
# responsibility.
incremental_backups: false
@@ -445,8 +448,8 @@
# given total throughput in Mbps. This is necessary because Cassandra does
# mostly sequential IO when streaming data during bootstrap or repair, which
# can lead to saturating the network connection and degrading rpc performance.
-# When unset, the default is 400 Mbps or 50 MB/s.
-# stream_throughput_outbound_megabits_per_sec: 400
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
# How long the coordinator should wait for read operations to complete
read_request_timeout_in_ms: 10000
@@ -454,6 +457,9 @@
range_request_timeout_in_ms: 10000
# How long the coordinator should wait for writes to complete
write_request_timeout_in_ms: 10000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row
+cas_contention_timeout_in_ms: 1000
# How long the coordinator should wait for truncates to complete
# (This can be much longer, because unless auto_snapshot is disabled
# we need to flush first so we can snapshot before removing the data.)
@@ -462,8 +468,10 @@
request_timeout_in_ms: 10000
# Enable operation timeout information exchange between nodes to accurately
-# measure request timeouts, If disabled cassandra will assuming the request
-# was forwarded to the replica instantly by the coordinator
+# measure request timeouts. If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing
+# already-timed-out requests.
#
# Warning: before enabling this property make sure to ntp is installed
# and the times are synchronized between the nodes.
@@ -471,7 +479,7 @@
# Enable socket timeout for streaming operation.
# When a timeout occurs during streaming, streaming is retried from the start
-# of the current file. This *can* involve re-streaming an important amount of
+# of the current file. This _can_ involve re-streaming an important amount of
# data, so you should avoid setting the value too low.
# Default value is 0, which never timeout streams.
# streaming_socket_timeout_in_ms: 0
@@ -514,9 +522,9 @@
# deployment conventions (as it did Facebook's), this is best used
# as an example of writing a custom Snitch class.
# - Ec2Snitch:
-# Appropriate for EC2 deployments in a single Region. Loads Region
+# Appropriate for EC2 deployments in a single Region. Loads Region
# and Availability Zone information from the EC2 API. The Region is
-# treated as the Datacenter, and the Availability Zone as the rack.
+# treated as the datacenter, and the Availability Zone as the rack.
# Only private IPs are used, so this will not work across multiple
# Regions.
# - Ec2MultiRegionSnitch:
@@ -582,22 +590,10 @@
# Keyspace1: 1
# Keyspace2: 5
-# request_scheduler_id -- An identifer based on which to perform
+# request_scheduler_id -- An identifier based on which to perform
# the request scheduling. Currently the only valid option is keyspace.
# request_scheduler_id: keyspace
-# index_interval controls the sampling of entries from the primrary
-# row index in terms of space versus time. The larger the interval,
-# the smaller and less effective the sampling will be. In technicial
-# terms, the interval coresponds to the number of index entries that
-# are skipped between taking each sample. All the sampled entries
-# must fit in memory. Generally, a value between 128 and 512 here
-# coupled with a large key cache size on CFs results in the best trade
-# offs. This value is not often changed, however if you have many
-# very small rows (many to an OS page), then increasing this will
-# often lower memory usage without a impact on performance.
-index_interval: 128
-
# Enable or disable inter-node encryption
# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
@@ -623,12 +619,17 @@
# algorithm: SunX509
# store_type: JKS
# cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA]
+ # require_client_auth: false
# enable or disable client/server encryption.
client_encryption_options:
enabled: false
keystore: conf/.keystore
keystore_password: cassandra
+ # require_client_auth: false
+ # Set trustore and truststore_password if require_client_auth is true
+ # truststore: conf/.truststore
+ # truststore_password: cassandra
# More advanced defaults below:
# protocol: TLS
# algorithm: SunX509
@@ -641,3 +642,15 @@
# dc - traffic between different datacenters is compressed
# none - nothing is compressed.
internode_compression: all
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# Enable or disable kernel page cache preheating from contents of the key cache after compaction.
+# When enabled it would preheat only first "page" (4KB) of each row to optimize
+# for sequential access. Note: This could be harmful for fat rows, see CASSANDRA-4937
+# for further details on that topic.
+preheat_kernel_page_cache: false
---------------------------------------------------------------------
To unsubscribe from this list, please visit:
http://xircles.codehaus.org/manage_email