[4/4] cassandra git commit: Introduce role based access control

2015-01-14 Thread aleksey
Introduce role based access control

patch by Sam Tunnicliffe; reviewed by Aleksey Yeschenko for
CASSANDRA-7653


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/879b694d
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/879b694d
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/879b694d

Branch: refs/heads/trunk
Commit: 879b694d346e6442c9508d8d8a48e6e71fbcd25b
Parents: c65a9f5
Author: Sam Tunnicliffe s...@beobal.com
Authored: Wed Jan 14 22:48:39 2015 +0300
Committer: Aleksey Yeschenko alek...@apache.org
Committed: Wed Jan 14 22:48:39 2015 +0300

--
 CHANGES.txt |   1 +
 NEWS.txt|  21 +
 bin/cqlsh   |  36 +-
 conf/cassandra.yaml |  20 +
 pylib/cqlshlib/cql3handling.py  |  62 +-
 pylib/cqlshlib/helptopics.py|  66 ++-
 .../cassandra/auth/AllowAllAuthenticator.java   |  44 +-
 src/java/org/apache/cassandra/auth/Auth.java| 298 --
 .../org/apache/cassandra/auth/AuthKeyspace.java |  90 +++
 .../cassandra/auth/AuthMigrationListener.java   |  37 ++
 .../cassandra/auth/AuthenticatedUser.java   | 115 +++-
 .../cassandra/auth/CassandraAuthorizer.java | 405 +
 .../cassandra/auth/CassandraRoleManager.java| 586 +++
 .../org/apache/cassandra/auth/DataResource.java |  58 +-
 .../apache/cassandra/auth/IAuthenticator.java   | 142 ++---
 .../org/apache/cassandra/auth/IAuthorizer.java  |  39 +-
 .../org/apache/cassandra/auth/IRoleManager.java | 200 +++
 .../cassandra/auth/ISaslAwareAuthenticator.java |  41 --
 .../cassandra/auth/LegacyAuthenticator.java |  94 ---
 .../apache/cassandra/auth/LegacyAuthorizer.java | 114 
 .../cassandra/auth/PasswordAuthenticator.java   | 255 +++-
 .../cassandra/auth/PermissionDetails.java   |  16 +-
 .../org/apache/cassandra/config/Config.java |   7 +-
 .../cassandra/config/DatabaseDescriptor.java|  20 +-
 src/java/org/apache/cassandra/cql3/Cql.g| 179 +-
 .../org/apache/cassandra/cql3/RoleName.java |  41 ++
 .../org/apache/cassandra/cql3/RoleOptions.java  |  62 ++
 .../org/apache/cassandra/cql3/UserOptions.java  |  62 --
 .../cql3/statements/AlterRoleStatement.java |  84 +++
 .../cql3/statements/AlterUserStatement.java |  92 ---
 .../cql3/statements/AuthorizationStatement.java |   4 +-
 .../cql3/statements/CreateRoleStatement.java|  76 +++
 .../cql3/statements/CreateUserStatement.java|  75 ---
 .../cql3/statements/DropRoleStatement.java  |  68 +++
 .../cql3/statements/DropUserStatement.java  |  72 ---
 .../cql3/statements/GrantRoleStatement.java |  39 ++
 .../cql3/statements/GrantStatement.java |  22 +-
 .../statements/ListPermissionsStatement.java|  42 +-
 .../cql3/statements/ListRolesStatement.java | 118 
 .../cql3/statements/ListUsersStatement.java |  52 +-
 .../statements/PermissionAlteringStatement.java |  16 +-
 .../cql3/statements/RevokeRoleStatement.java|  40 ++
 .../cql3/statements/RevokeStatement.java|  22 +-
 .../statements/RoleManagementStatement.java |  54 ++
 .../hadoop/AbstractBulkRecordWriter.java|  25 +-
 .../hadoop/AbstractColumnFamilyInputFormat.java |  28 +-
 .../AbstractColumnFamilyOutputFormat.java   |   9 +-
 .../hadoop/pig/AbstractCassandraStorage.java|  33 +-
 .../apache/cassandra/service/ClientState.java   |  50 +-
 .../cassandra/service/StorageService.java   |  71 ++-
 .../cassandra/thrift/CassandraServer.java   |  28 +-
 .../org/apache/cassandra/tools/BulkLoader.java  |  20 +-
 .../org/apache/cassandra/transport/Client.java  |  15 +-
 .../org/apache/cassandra/transport/Server.java  |  30 +-
 .../cassandra/transport/ServerConnection.java   |  20 +-
 .../transport/messages/AuthResponse.java|  25 +-
 .../transport/messages/CredentialsMessage.java  |  10 +-
 .../org/apache/cassandra/utils/FBUtilities.java |  24 +-
 58 files changed, 2766 insertions(+), 1609 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index d80eeaf..30a741e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0
+ * Add role based access control (CASSANDRA-7653)
  * Group sstables for anticompaction correctly (CASSANDRA-8578)
  * Add ReadFailureException to native protocol, respond
immediately when replicas encounter errors while handling

http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/NEWS.txt
--
diff --git a/NEWS.txt b/NEWS.txt
index 8d8ebdc..b9c4173 100644

[3/4] cassandra git commit: Introduce role based access control

2015-01-14 Thread aleksey
http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/src/java/org/apache/cassandra/auth/CassandraRoleManager.java
--
diff --git a/src/java/org/apache/cassandra/auth/CassandraRoleManager.java 
b/src/java/org/apache/cassandra/auth/CassandraRoleManager.java
new file mode 100644
index 000..34feb22
--- /dev/null
+++ b/src/java/org/apache/cassandra/auth/CassandraRoleManager.java
@@ -0,0 +1,586 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.auth;
+
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.*;
+import com.google.common.base.Objects;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.concurrent.ScheduledExecutors;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.config.Schema;
+import org.apache.cassandra.cql3.*;
+import org.apache.cassandra.cql3.statements.SelectStatement;
+import org.apache.cassandra.db.ConsistencyLevel;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.exceptions.*;
+import org.apache.cassandra.service.QueryState;
+import org.apache.cassandra.transport.messages.ResultMessage;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.mindrot.jbcrypt.BCrypt;
+
+/**
+ * Responsible for the creation, maintainance and delation of roles
+ * for the purposes of authentication and authorization.
+ * Role data is stored internally, using the roles and role_members tables
+ * in the system_auth keyspace.
+ *
+ * Additionally, if org.apache.cassandra.auth.PasswordAuthenticator is used,
+ * encrypted passwords are also stored in the system_auth.roles table. This
+ * coupling between the IAuthenticator and IRoleManager implementations exists
+ * because setting a role's password via CQL is done with a CREATE ROLE or
+ * ALTER ROLE statement, the processing of which is handled by IRoleManager.
+ * As IAuthenticator is concerned only with credentials checking and has no
+ * means to modify passwords, PasswordAuthenticator depends on
+ * CassandraRoleManager for those functions.
+ *
+ * Alternative IAuthenticator implementations may be used in conjunction with
+ * CassandraRoleManager, but WITH PASSWORD = 'password' will not be supported
+ * in CREATE/ALTER ROLE statements.
+ *
+ * Such a configuration could be implemented using a custom IRoleManager that
+ * extends CassandraRoleManager and which includes Option.PASSWORD in the 
SetOption
+ * returned from supportedOptions/alterableOptions. Any additional processing
+ * of the password itself (such as storing it in an alternative location) would
+ * be added in overriden createRole and alterRole implementations.
+ */
+public class CassandraRoleManager implements IRoleManager
+{
+private static final Logger logger = 
LoggerFactory.getLogger(CassandraRoleManager.class);
+
+static final String DEFAULT_SUPERUSER_NAME = cassandra;
+static final String DEFAULT_SUPERUSER_PASSWORD = cassandra;
+
+// Transform a row in the AuthKeyspace.ROLES to a Role instance
+private static final FunctionUntypedResultSet.Row, Role ROW_TO_ROLE = 
new FunctionUntypedResultSet.Row, Role()
+{
+public Role apply(UntypedResultSet.Row row)
+{
+return new Role(row.getString(role),
+row.getBoolean(is_superuser),
+row.getBoolean(can_login),
+row.has(member_of) ? row.getSet(member_of, 
UTF8Type.instance)
+ : 
Collections.StringemptySet());
+}
+};
+
+public static final String LEGACY_USERS_TABLE = users;
+// Transform a row in the legacy system_auth.users table to a Role 
instance,
+// used to fallback to previous schema on a mixed cluster during an upgrade
+private static final FunctionUntypedResultSet.Row, Role 
LEGACY_ROW_TO_ROLE = new FunctionUntypedResultSet.Row, Role()
+{
+public Role 

[1/4] cassandra git commit: Introduce role based access control

2015-01-14 Thread aleksey
Repository: cassandra
Updated Branches:
  refs/heads/trunk c65a9f5c6 - 879b694d3


http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/src/java/org/apache/cassandra/service/ClientState.java
--
diff --git a/src/java/org/apache/cassandra/service/ClientState.java 
b/src/java/org/apache/cassandra/service/ClientState.java
index 36f8326..21d10f9 100644
--- a/src/java/org/apache/cassandra/service/ClientState.java
+++ b/src/java/org/apache/cassandra/service/ClientState.java
@@ -18,11 +18,12 @@
 package org.apache.cassandra.service;
 
 import java.net.SocketAddress;
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 
 import com.google.common.collect.Iterables;
-import com.google.common.collect.Sets;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -31,13 +32,13 @@ import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.config.Schema;
 import org.apache.cassandra.cql3.QueryHandler;
 import org.apache.cassandra.cql3.QueryProcessor;
-import org.apache.cassandra.schema.LegacySchemaTables;
 import org.apache.cassandra.db.SystemKeyspace;
 import org.apache.cassandra.exceptions.AuthenticationException;
 import org.apache.cassandra.exceptions.InvalidRequestException;
 import org.apache.cassandra.exceptions.UnauthorizedException;
-import org.apache.cassandra.tracing.TraceKeyspace;
+import org.apache.cassandra.schema.LegacySchemaTables;
 import org.apache.cassandra.thrift.ThriftValidation;
+import org.apache.cassandra.tracing.TraceKeyspace;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.cassandra.utils.SemanticVersion;
@@ -52,16 +53,27 @@ public class ClientState
 
 private static final SetIResource READABLE_SYSTEM_RESOURCES = new 
HashSet();
 private static final SetIResource PROTECTED_AUTH_RESOURCES = new 
HashSet();
-
+private static final SetString ALTERABLE_SYSTEM_KEYSPACES = new 
HashSet();
+private static final SetIResource DROPPABLE_SYSTEM_TABLES = new 
HashSet();
 static
 {
 // We want these system cfs to be always readable to authenticated 
users since many tools rely on them
 // (nodetool, cqlsh, bulkloader, etc.)
 for (String cf : Iterables.concat(Arrays.asList(SystemKeyspace.LOCAL, 
SystemKeyspace.PEERS), LegacySchemaTables.ALL))
-
READABLE_SYSTEM_RESOURCES.add(DataResource.columnFamily(SystemKeyspace.NAME, 
cf));
+
READABLE_SYSTEM_RESOURCES.add(DataResource.table(SystemKeyspace.NAME, cf));
 
 
PROTECTED_AUTH_RESOURCES.addAll(DatabaseDescriptor.getAuthenticator().protectedResources());
 
PROTECTED_AUTH_RESOURCES.addAll(DatabaseDescriptor.getAuthorizer().protectedResources());
+
PROTECTED_AUTH_RESOURCES.addAll(DatabaseDescriptor.getRoleManager().protectedResources());
+
+// allow users with sufficient privileges to alter KS level options on 
AUTH_KS and
+// TRACING_KS, and also to drop legacy tables (users, credentials, 
permissions) from
+// AUTH_KS
+ALTERABLE_SYSTEM_KEYSPACES.add(AuthKeyspace.NAME);
+ALTERABLE_SYSTEM_KEYSPACES.add(TraceKeyspace.NAME);
+DROPPABLE_SYSTEM_TABLES.add(DataResource.table(AuthKeyspace.NAME, 
PasswordAuthenticator.LEGACY_CREDENTIALS_TABLE));
+DROPPABLE_SYSTEM_TABLES.add(DataResource.table(AuthKeyspace.NAME, 
CassandraRoleManager.LEGACY_USERS_TABLE));
+DROPPABLE_SYSTEM_TABLES.add(DataResource.table(AuthKeyspace.NAME, 
CassandraAuthorizer.USER_PERMISSIONS));
 }
 
 // Current user for the session
@@ -200,10 +212,13 @@ public class ClientState
  */
 public void login(AuthenticatedUser user) throws AuthenticationException
 {
-if (!user.isAnonymous()  !Auth.isExistingUser(user.getName()))
-   throw new AuthenticationException(String.format(User %s doesn't 
exist - create it with CREATE USER query first,
-   user.getName()));
-this.user = user;
+// Login privilege is not inherited via granted roles, so just
+// verify that the role with the credentials that were actually
+// supplied has it
+if (user.isAnonymous() || 
DatabaseDescriptor.getRoleManager().canLogin(user.getName()))
+this.user = user;
+else
+throw new AuthenticationException(String.format(%s is not 
permitted to log in, user.getName()));
 }
 
 public void hasAllKeyspacesAccess(Permission perm) throws 
UnauthorizedException
@@ -223,7 +238,7 @@ public class ClientState
 throws UnauthorizedException, InvalidRequestException
 {
 ThriftValidation.validateColumnFamily(keyspace, columnFamily);
-hasAccess(keyspace, perm, DataResource.columnFamily(keyspace, 
columnFamily));
+

[2/4] cassandra git commit: Introduce role based access control

2015-01-14 Thread aleksey
http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/src/java/org/apache/cassandra/config/Config.java
--
diff --git a/src/java/org/apache/cassandra/config/Config.java 
b/src/java/org/apache/cassandra/config/Config.java
index 15e7203..33d2bb2 100644
--- a/src/java/org/apache/cassandra/config/Config.java
+++ b/src/java/org/apache/cassandra/config/Config.java
@@ -19,20 +19,19 @@ package org.apache.cassandra.config;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.sql.Time;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.collect.Sets;
-import org.supercsv.io.CsvListReader;
-import org.supercsv.prefs.CsvPreference;
 
 import org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions;
 import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions;
 import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.io.util.NativeAllocator;
 import org.apache.cassandra.utils.FBUtilities;
+import org.supercsv.io.CsvListReader;
+import org.supercsv.prefs.CsvPreference;
 
 /**
  * A class that contains configuration properties for the cassandra node it 
runs within.
@@ -44,9 +43,11 @@ public class Config
 public String cluster_name = Test Cluster;
 public String authenticator;
 public String authorizer;
+public String role_manager;
 public int permissions_validity_in_ms = 2000;
 public int permissions_cache_max_entries = 1000;
 public int permissions_update_interval_in_ms = -1;
+public int roles_validity_in_ms = 2000;
 
 /* Hashing strategy Random or OPHF */
 public String partitioner;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
--
diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java 
b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
index 9780db7..6d626da 100644
--- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
+++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
@@ -81,6 +81,7 @@ public class DatabaseDescriptor
 
 private static IAuthenticator authenticator = new AllowAllAuthenticator();
 private static IAuthorizer authorizer = new AllowAllAuthorizer();
+private static IRoleManager roleManager = new CassandraRoleManager();
 
 private static IRequestScheduler requestScheduler;
 private static RequestSchedulerId requestSchedulerId;
@@ -184,7 +185,7 @@ public class DatabaseDescriptor
 }
 }
 
-/* Authentication and authorization backend, implementing 
IAuthenticator and IAuthorizer */
+/* Authentication, authorization and role management backend, 
implementing IAuthenticator, IAuthorizer  IRoleMapper*/
 if (conf.authenticator != null)
 authenticator = FBUtilities.newAuthenticator(conf.authenticator);
 
@@ -194,6 +195,12 @@ public class DatabaseDescriptor
 if (authenticator instanceof AllowAllAuthenticator  !(authorizer 
instanceof AllowAllAuthorizer))
 throw new ConfigurationException(AllowAllAuthenticator can't be 
used with  +  conf.authorizer);
 
+if (conf.role_manager != null)
+roleManager = FBUtilities.newRoleManager(conf.role_manager);
+
+if (authenticator instanceof PasswordAuthenticator  !(roleManager 
instanceof CassandraRoleManager))
+throw new ConfigurationException(CassandraRoleManager must be 
used with PasswordAuthenticator);
+
 if (conf.internode_authenticator != null)
 internodeAuthenticator = 
FBUtilities.construct(conf.internode_authenticator, internode_authenticator);
 else
@@ -201,6 +208,7 @@ public class DatabaseDescriptor
 
 authenticator.validateConfiguration();
 authorizer.validateConfiguration();
+roleManager.validateConfiguration();
 internodeAuthenticator.validateConfiguration();
 
 /* Hashing strategy */
@@ -604,6 +612,11 @@ public class DatabaseDescriptor
 return authorizer;
 }
 
+public static IRoleManager getRoleManager()
+{
+return roleManager;
+}
+
 public static int getPermissionsValidity()
 {
 return conf.permissions_validity_in_ms;
@@ -621,6 +634,11 @@ public class DatabaseDescriptor
  : conf.permissions_update_interval_in_ms;
 }
 
+public static int getRolesValidity()
+{
+return conf.roles_validity_in_ms;
+}
+
 public static int getThriftFramedTransportSize()
 {
 return conf.thrift_framed_transport_size_in_mb * 1024 * 1024;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/879b694d/src/java/org/apache/cassandra/cql3/Cql.g
--
diff --git 

[jira] [Commented] (CASSANDRA-7032) Improve vnode allocation

2015-01-14 Thread Branimir Lambov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-7032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277652#comment-14277652
 ] 

Branimir Lambov commented on CASSANDRA-7032:


No, I have not tried asymmetric configs yet, but I don't foresee significant 
problems adapting the method to them. Assuming configuration is done by 
specifying a number of vnodes in the node it should be just a matter of setting 
the optimal size based on that number; of course it remains to be seen if that 
won't cause some weird behaviour.

The speed problem is now mostly solved, I'll post a new version soon and try 
that out. 

 Improve vnode allocation
 

 Key: CASSANDRA-7032
 URL: https://issues.apache.org/jira/browse/CASSANDRA-7032
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Benedict
Assignee: Branimir Lambov
  Labels: performance, vnodes
 Fix For: 3.0

 Attachments: TestVNodeAllocation.java, TestVNodeAllocation.java, 
 TestVNodeAllocation.java, TestVNodeAllocation.java


 It's been known for a little while that random vnode allocation causes 
 hotspots of ownership. It should be possible to improve dramatically on this 
 with deterministic allocation. I have quickly thrown together a simple greedy 
 algorithm that allocates vnodes efficiently, and will repair hotspots in a 
 randomly allocated cluster gradually as more nodes are added, and also 
 ensures that token ranges are fairly evenly spread between nodes (somewhat 
 tunably so). The allocation still permits slight discrepancies in ownership, 
 but it is bound by the inverse of the size of the cluster (as opposed to 
 random allocation, which strangely gets worse as the cluster size increases). 
 I'm sure there is a decent dynamic programming solution to this that would be 
 even better.
 If on joining the ring a new node were to CAS a shared table where a 
 canonical allocation of token ranges lives after running this (or a similar) 
 algorithm, we could then get guaranteed bounds on the ownership distribution 
 in a cluster. This will also help for CASSANDRA-6696.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-6809) Compressed Commit Log

2015-01-14 Thread Branimir Lambov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-6809?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277638#comment-14277638
 ] 

Branimir Lambov commented on CASSANDRA-6809:


Thanks for the review, Ariel. The plan was to implement compression without 
introducing too much extra complexity thus I've stayed away from adding new 
queues and stages dedicated to compression. Even so, a pipeline that achieves 
the highest possible throughput is quite doable by simply using more than one 
sync thread, and the size of the compression window is easily controlled via 
the sync period. Since we don't have an incremental compression mechanism, 
compression necessarily has to happen at the end, i.e. when the whole of the 
set of mutations to compress has been written to the buffer (this is usually 
not at the end of the segment).

CASSANDRA-7075 is valuable on its own right. Arguably RAID 0 is not good enough 
in either performance or reliability. None of the extra complexity we introduce 
there is made necessary by compression-related concerns, but one of the side 
effects of it is the availability of more than one sync thread for compression. 
It is a solution of sorts to the lack of CPU saturation from this patch, but it 
is not at all the only way to achieve it.

This code was written before ByteBuffer compression was made available; I will 
definitely make use of that now, but I wonder if  that should not be a separate 
patch so that we don't have to block on/conflict with Jake's patch.

The playback tests are in the various RecoveryManagerTests in o.a.c.db; the 
tests are the same for the uncompressed (test/testold target) and compressed 
case (test-compressed target). For performance tests the ultimate measure is 
cassandra-stress; ComitLogStress is a simple microbenchmark of how much we can 
push that favors compression -- make sure to run it with periodic rather than 
batch sync. A latency test is probably needed for batch mode; this probably 
needs some changes to the service to make sure the sync period can go low 
enough for the write latency to show up. 

I have not looked at the other suggestions yet; I have to switch modes from the 
quite different node allocation work-- give me a couple of days.

 Compressed Commit Log
 -

 Key: CASSANDRA-6809
 URL: https://issues.apache.org/jira/browse/CASSANDRA-6809
 Project: Cassandra
  Issue Type: Improvement
Reporter: Benedict
Assignee: Branimir Lambov
Priority: Minor
  Labels: performance
 Fix For: 3.0

 Attachments: logtest.txt


 It seems an unnecessary oversight that we don't compress the commit log. 
 Doing so should improve throughput, but some care will need to be taken to 
 ensure we use as much of a segment as possible. I propose decoupling the 
 writing of the records from the segments. Basically write into a (queue of) 
 DirectByteBuffer, and have the sync thread compress, say, ~64K chunks every X 
 MB written to the CL (where X is ordinarily CLS size), and then pack as many 
 of the compressed chunks into a CLS as possible.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Issue Comment Deleted] (CASSANDRA-8577) Values of set types not loading correctly into Pig

2015-01-14 Thread Alex Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8577?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alex Liu updated CASSANDRA-8577:

Comment: was deleted

(was: duplicate of CASSANDRA-8577)

 Values of set types not loading correctly into Pig
 --

 Key: CASSANDRA-8577
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8577
 Project: Cassandra
  Issue Type: Bug
Reporter: Oksana Danylyshyn
Assignee: Artem Aliev
 Fix For: 2.1.3

 Attachments: cassandra-2.1-8577.txt


 Values of set types are not loading correctly from Cassandra (cql3 table, 
 Native protocol v3) into Pig using CqlNativeStorage. 
 When using Cassandra version 2.1.0 only empty values are loaded, and for 
 newer versions (2.1.1 and 2.1.2) the following error is received: 
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
 at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
 Steps to reproduce:
 {code}cqlsh:socialdata CREATE TABLE test (
  key varchar PRIMARY KEY,
  tags setvarchar
);
 cqlsh:socialdata insert into test (key, tags) values ('key', {'Running', 
 'onestep4red', 'running'});
 cqlsh:socialdata select * from test;
  key | tags
 -+---
  key | {'Running', 'onestep4red', 'running'}
 (1 rows){code}
 With version 2.1.0:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 (key,()){code}
 With version 2.1.2:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:27)
   at 
 org.apache.cassandra.hadoop.pig.AbstractCassandraStorage.cassandraToObj(AbstractCassandraStorage.java:796)
   at 
 org.apache.cassandra.hadoop.pig.CqlStorage.cqlColumnToObj(CqlStorage.java:195)
   at 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage.getNext(CqlNativeStorage.java:106)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader.nextKeyValue(PigRecordReader.java:211)
   at 
 org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:532)
   at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
   at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:143)
   at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
   at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
   at 
 org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212){code}
 Expected result:
 {code}(key,(Running,onestep4red,running)){code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Comment Edited] (CASSANDRA-8577) Values of set types not loading correctly into Pig

2015-01-14 Thread Alex Liu (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8577?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277952#comment-14277952
 ] 

Alex Liu edited comment on CASSANDRA-8577 at 1/15/15 12:09 AM:
---

duplicate of CASSANDRA-8577


was (Author: alexliu68):
duplicate of CASSANDRA-8577

 Values of set types not loading correctly into Pig
 --

 Key: CASSANDRA-8577
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8577
 Project: Cassandra
  Issue Type: Bug
Reporter: Oksana Danylyshyn
Assignee: Artem Aliev
 Fix For: 2.1.3

 Attachments: cassandra-2.1-8577.txt


 Values of set types are not loading correctly from Cassandra (cql3 table, 
 Native protocol v3) into Pig using CqlNativeStorage. 
 When using Cassandra version 2.1.0 only empty values are loaded, and for 
 newer versions (2.1.1 and 2.1.2) the following error is received: 
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
 at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
 Steps to reproduce:
 {code}cqlsh:socialdata CREATE TABLE test (
  key varchar PRIMARY KEY,
  tags setvarchar
);
 cqlsh:socialdata insert into test (key, tags) values ('key', {'Running', 
 'onestep4red', 'running'});
 cqlsh:socialdata select * from test;
  key | tags
 -+---
  key | {'Running', 'onestep4red', 'running'}
 (1 rows){code}
 With version 2.1.0:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 (key,()){code}
 With version 2.1.2:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:27)
   at 
 org.apache.cassandra.hadoop.pig.AbstractCassandraStorage.cassandraToObj(AbstractCassandraStorage.java:796)
   at 
 org.apache.cassandra.hadoop.pig.CqlStorage.cqlColumnToObj(CqlStorage.java:195)
   at 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage.getNext(CqlNativeStorage.java:106)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader.nextKeyValue(PigRecordReader.java:211)
   at 
 org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:532)
   at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
   at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:143)
   at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
   at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
   at 
 org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212){code}
 Expected result:
 {code}(key,(Running,onestep4red,running)){code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-7338) CFS.getRangeSlice should update latency metrics

2015-01-14 Thread Aleksey Yeschenko (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-7338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277796#comment-14277796
 ] 

Aleksey Yeschenko commented on CASSANDRA-7338:
--

[~cnlwsu] It will be the EstimatedHistogram in 3.0 anyway. Do you need it badly 
in 2.1?

 CFS.getRangeSlice should update latency metrics
 ---

 Key: CASSANDRA-7338
 URL: https://issues.apache.org/jira/browse/CASSANDRA-7338
 Project: Cassandra
  Issue Type: Improvement
Reporter: Sam Tunnicliffe
Assignee: Sam Tunnicliffe
Priority: Trivial
 Fix For: 2.1 rc2

 Attachments: 7338-3.txt, CASSANDRA-7338-v2.txt, CASSANDRA-7338.txt


 CFS.getRangeSlice doesn't update the CF readLatency metric in the same way as 
 CFS.getColumnFamily does. 
 I may be missing something, but I couldn't see a good reason why this wasn't 
 already the case as without it, SELECT * FROM t WHERE x=y results in the 
 read metrics being incremented, but SELECT * FROM t doesn't.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8576) Primary Key Pushdown For Hadoop

2015-01-14 Thread Alex Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8576?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alex Liu updated CASSANDRA-8576:

Attachment: 8576-2.1-branch.txt

v1 patch is attached to only support full partition key EQ queries.

 Primary Key Pushdown For Hadoop
 ---

 Key: CASSANDRA-8576
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8576
 Project: Cassandra
  Issue Type: Improvement
  Components: Hadoop
Reporter: Russell Alexander Spitzer
 Attachments: 8576-2.1-branch.txt


 I've heard reports from several users that they would like to have predicate 
 pushdown functionality for hadoop (Hive in particular) based services. 
 Example usecase
 Table with wide partitions, one per customer
 Application team has HQL they would like to run on a single customer
 Currently time to complete scales with number of customers since Input Format 
 can't pushdown primary key predicate
 Current implementation requires a full table scan (since it can't recognize 
 that a single partition was specified)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (CASSANDRA-8622) All of pig-test is failing in trunk

2015-01-14 Thread Philip Thompson (JIRA)
Philip Thompson created CASSANDRA-8622:
--

 Summary: All of pig-test is failing in trunk
 Key: CASSANDRA-8622
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8622
 Project: Cassandra
  Issue Type: Test
  Components: Hadoop
Reporter: Philip Thompson
Assignee: Brandon Williams
 Fix For: 3.0


See http://cassci.datastax.com/job/trunk_pigtest/330/testReport/
Every test in the ant target {{ant pig-test}} has been failing on trunk for a 
while now.

{code}
java.lang.ExceptionInInitializerError
at org.apache.log4j.Logger.getLogger(Logger.java:40)
at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:48)
at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:44)
at org.hyperic.sigar.SigarLog.debug(SigarLog.java:60)
at org.hyperic.sigar.Sigar.clinit(Sigar.java:108)
at org.apache.cassandra.utils.SigarLibrary.init(SigarLibrary.java:45)
at 
org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:206)
at 
org.apache.cassandra.service.CassandraDaemon.init(CassandraDaemon.java:408)
at 
org.apache.cassandra.service.EmbeddedCassandraService.start(EmbeddedCassandraService.java:52)
at 
org.apache.cassandra.pig.PigTestBase.startCassandra(PigTestBase.java:96)
at 
org.apache.cassandra.pig.CqlRecordReaderTest.setup(CqlRecordReaderTest.java:63)
at 
org.apache.log4j.Log4jLoggerFactory.clinit(Log4jLoggerFactory.java:50)
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Comment Edited] (CASSANDRA-8622) All of pig-test is failing in trunk

2015-01-14 Thread Brandon Williams (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14278036#comment-14278036
 ] 

Brandon Williams edited comment on CASSANDRA-8622 at 1/15/15 1:12 AM:
--

Can you find out when this first started failing? At first glance it looks like 
adding sigar broke it, and pig has parity between 2.1 and trunk.


was (Author: brandon.williams):
Can you find out when this first started failing? At first glance it looks like 
adding sigar broke it.

 All of pig-test is failing in trunk
 ---

 Key: CASSANDRA-8622
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8622
 Project: Cassandra
  Issue Type: Test
  Components: Hadoop
Reporter: Philip Thompson
Assignee: Brandon Williams
 Fix For: 3.0


 See http://cassci.datastax.com/job/trunk_pigtest/330/testReport/
 Every test in the ant target {{ant pig-test}} has been failing on trunk for a 
 while now.
 {code}
 java.lang.ExceptionInInitializerError
   at org.apache.log4j.Logger.getLogger(Logger.java:40)
   at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:48)
   at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:44)
   at org.hyperic.sigar.SigarLog.debug(SigarLog.java:60)
   at org.hyperic.sigar.Sigar.clinit(Sigar.java:108)
   at org.apache.cassandra.utils.SigarLibrary.init(SigarLibrary.java:45)
   at 
 org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:206)
   at 
 org.apache.cassandra.service.CassandraDaemon.init(CassandraDaemon.java:408)
   at 
 org.apache.cassandra.service.EmbeddedCassandraService.start(EmbeddedCassandraService.java:52)
   at 
 org.apache.cassandra.pig.PigTestBase.startCassandra(PigTestBase.java:96)
   at 
 org.apache.cassandra.pig.CqlRecordReaderTest.setup(CqlRecordReaderTest.java:63)
   at 
 org.apache.log4j.Log4jLoggerFactory.clinit(Log4jLoggerFactory.java:50)
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-7653) Add role based access control to Cassandra

2015-01-14 Thread Philip Thompson (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-7653?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Philip Thompson updated CASSANDRA-7653:
---
Tester: Philip Thompson

 Add role based access control to Cassandra
 --

 Key: CASSANDRA-7653
 URL: https://issues.apache.org/jira/browse/CASSANDRA-7653
 Project: Cassandra
  Issue Type: New Feature
  Components: Core
Reporter: Mike Adamson
Assignee: Sam Tunnicliffe
 Fix For: 3.0

 Attachments: 7653.patch, CQLSmokeTest.java, cql_smoke_test.py


 The current authentication model supports granting permissions to individual 
 users. While this is OK for small or medium organizations wanting to 
 implement authorization, it does not work well in large organizations because 
 of the overhead of having to maintain the permissions for each user.
 Introducing roles into the authentication model would allow sets of 
 permissions to be controlled in one place as a role and then the role granted 
 to users. Roles should also be able to be granted to other roles to allow 
 hierarchical sets of permissions to be built up.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8518) Cassandra Query Request Size Estimator

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8518?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277953#comment-14277953
 ] 

Benedict commented on CASSANDRA-8518:
-

This is one of the two methods I proposed, and I'm comfortable aiming for the 
global threshold. Per-request thresholds are also a possibility, and seem 
reasonable also. Whether or not we _throttle_ or simply discard some in-flight 
queries on exceeding our limit is another matter though. I would prefer to go 
the route of discarding some random in-flight queries, as this brings the 
system back to full health immediately, instead of letting it crawl along until 
the blockage clears.

 Cassandra Query Request Size Estimator
 --

 Key: CASSANDRA-8518
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8518
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Cheng Ren

 We have been suffering from cassandra node crash due to out of memory for a 
 long time. The heap dump from the recent crash shows there are 22 native 
 transport request threads each of which consumes 3.3% of heap size, taking 
 more than 70% in total.  
 Heap dump:
 !https://dl-web.dropbox.com/get/attach1.png?_subject_uid=303980955w=AAAVOoncBoZ5aOPbDg2TpRkUss7B-2wlrnhUAv19b27OUA|height=400,width=600!
 Expanded view of one thread:
 !https://dl-web.dropbox.com/get/Screen%20Shot%202014-12-18%20at%204.06.29%20PM.png?_subject_uid=303980955w=AACUO4wrbxheRUxv8fwQ9P52T6gBOm5_g9zeIe8odu3V3w|height=400,width=600!
 The cassandra we are using now (2.0.4) utilized MemoryAwareThreadPoolExecutor 
 as the request executor and provided a default request size estimator which 
 constantly returns 1, meaning it limits only the number of requests being 
 pushed to the pool. To have more fine-grained control on handling requests 
 and better protect our node from OOM issue, we propose implementing a more 
 precise estimator. 
 Here is our two cents:
 For update/delete/insert request: Size could be estimated by adding size of 
 all class members together.
 For scan query, the major part of the request is response, which can be 
 estimated from the history data. For example if we receive a scan query on a 
 column family for a certain token range, we keep track of its response size 
 used as the estimated response size for later scan query on the same cf. 
 For future requests on the same cf, response size could be calculated by 
 token range*recorded size/ recorded token range. The request size should be 
 estimated as (query size + estimated response size).
 We believe what we're proposing here can be useful for other people in the 
 Cassandra community as well. Would you mind providing us feedbacks? Please 
 let us know if you have any concerns or suggestions regarding this proposal.
 Thanks,
 Cheng



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8577) Values of set types not loading correctly into Pig

2015-01-14 Thread Alex Liu (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8577?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277952#comment-14277952
 ] 

Alex Liu commented on CASSANDRA-8577:
-

duplicate of CASSANDRA-8577

 Values of set types not loading correctly into Pig
 --

 Key: CASSANDRA-8577
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8577
 Project: Cassandra
  Issue Type: Bug
Reporter: Oksana Danylyshyn
Assignee: Artem Aliev
 Fix For: 2.1.3

 Attachments: cassandra-2.1-8577.txt


 Values of set types are not loading correctly from Cassandra (cql3 table, 
 Native protocol v3) into Pig using CqlNativeStorage. 
 When using Cassandra version 2.1.0 only empty values are loaded, and for 
 newer versions (2.1.1 and 2.1.2) the following error is received: 
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
 at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
 Steps to reproduce:
 {code}cqlsh:socialdata CREATE TABLE test (
  key varchar PRIMARY KEY,
  tags setvarchar
);
 cqlsh:socialdata insert into test (key, tags) values ('key', {'Running', 
 'onestep4red', 'running'});
 cqlsh:socialdata select * from test;
  key | tags
 -+---
  key | {'Running', 'onestep4red', 'running'}
 (1 rows){code}
 With version 2.1.0:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 (key,()){code}
 With version 2.1.2:
 {code}grunt data = load 'cql://socialdata/test' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt dump data;
 org.apache.cassandra.serializers.MarshalException: Unexpected extraneous 
 bytes after set value
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:94)
   at 
 org.apache.cassandra.serializers.SetSerializer.deserializeForNativeProtocol(SetSerializer.java:27)
   at 
 org.apache.cassandra.hadoop.pig.AbstractCassandraStorage.cassandraToObj(AbstractCassandraStorage.java:796)
   at 
 org.apache.cassandra.hadoop.pig.CqlStorage.cqlColumnToObj(CqlStorage.java:195)
   at 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage.getNext(CqlNativeStorage.java:106)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader.nextKeyValue(PigRecordReader.java:211)
   at 
 org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:532)
   at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
   at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:143)
   at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
   at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
   at 
 org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212){code}
 Expected result:
 {code}(key,(Running,onestep4red,running)){code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8502) Static columns returning null for pages after first

2015-01-14 Thread Tyler Hobbs (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8502?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277974#comment-14277974
 ] 

Tyler Hobbs commented on CASSANDRA-8502:


I've got a working patch for forward slices, but reversed slices are more 
problematic.  We need to add an extra slice over the statics for every page, 
but with reversed queries, the static slice comes _after_ the normal slice, so 
the page limit prevents the static slice from being used until the last page.  
So instead of adding a second slice to the read, it looks like we'll need to 
split the reversed queries into two separate reads: one for the static columns, 
and one for the normal data.

 Static columns returning null for pages after first
 ---

 Key: CASSANDRA-8502
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8502
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Reporter: Flavien Charlon
Assignee: Tyler Hobbs
 Fix For: 2.1.3, 2.0.13

 Attachments: null-static-column.txt


 When paging is used for a query containing a static column, the first page 
 contains the right value for the static column, but subsequent pages have 
 null null for the static column instead of the expected value.
 Repro steps:
 - Create a table with a static column
 - Create a partition with 500 cells
 - Using cqlsh, query that partition
 Actual result:
 - You will see that first, the static column appears as expected, but if you 
 press a key after ---MORE---, the static columns will appear as null.
 See the attached file for a repro of the output.
 I am using a single node cluster.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8621) For streaming operations, when a socket is closed/reset, we should retry/reinitiate that stream

2015-01-14 Thread Jeremy Hanna (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8621?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jeremy Hanna updated CASSANDRA-8621:

Description: 
Currently we have a setting (streaming_socket_timeout_in_ms) that will timeout 
and retry the stream operation in the case where tcp is idle for a period of 
time.  However in the case where the socket is closed or reset, we do not retry 
the operation.  This can happen for a number of reasons, including when a 
firewall sends a reset message on a socket during a streaming operation, such 
as nodetool rebuild necessarily across DCs or repairs.

Doing a retry would make the streaming operations more resilient.  It would be 
good to log the retry clearly as well (with the stream session ID and node 
address).

  was:
Currently we have a setting (streaming_socket_timeout_in_ms) that will timeout 
and retry the stream operation in the case where tcp is idle for a period of 
time.  However in the case where the socket is closed or reset, we do not retry 
the operation.  This can happen for a number of reasons, including when a 
firewall sends a reset message on a socket during a streaming operation, such 
as nodetool rebuild necessarily across DCs or repairs.

Doing a retry would make the streaming operations more resilient.  It would be 
good to log the retry clearly as well (with the stream session ID).


 For streaming operations, when a socket is closed/reset, we should 
 retry/reinitiate that stream
 ---

 Key: CASSANDRA-8621
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8621
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Jeremy Hanna
Assignee: Yuki Morishita

 Currently we have a setting (streaming_socket_timeout_in_ms) that will 
 timeout and retry the stream operation in the case where tcp is idle for a 
 period of time.  However in the case where the socket is closed or reset, we 
 do not retry the operation.  This can happen for a number of reasons, 
 including when a firewall sends a reset message on a socket during a 
 streaming operation, such as nodetool rebuild necessarily across DCs or 
 repairs.
 Doing a retry would make the streaming operations more resilient.  It would 
 be good to log the retry clearly as well (with the stream session ID and node 
 address).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8518) Cassandra Query Request Size Estimator

2015-01-14 Thread Cheng Ren (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8518?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277942#comment-14277942
 ] 

Cheng Ren commented on CASSANDRA-8518:
--

Thanks so much for your helpful reply, we believe your approach which 
coordinates all queries and kill queries if we exceed an limit would be the 
potential solution for us. 
In our understanding, we will have a global size counter keeping track of the 
total size of the current queries being processed. Query serving is a 
multiple-stage process (read-response-repair). Each stage will add to the 
total size. Once total size exceeds the limit we need to throttle future 
queries until it goes below the threshold. We could have this threshold set 
from cassandra yaml file and reset via nodetool in the runtime so that users 
could customize based on their available memory resource and SLA(if the 
threshold is over small, the query latency will increase)
So is our understanding correct? We would like to hear your feedback.

Thanks 


 Cassandra Query Request Size Estimator
 --

 Key: CASSANDRA-8518
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8518
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Cheng Ren

 We have been suffering from cassandra node crash due to out of memory for a 
 long time. The heap dump from the recent crash shows there are 22 native 
 transport request threads each of which consumes 3.3% of heap size, taking 
 more than 70% in total.  
 Heap dump:
 !https://dl-web.dropbox.com/get/attach1.png?_subject_uid=303980955w=AAAVOoncBoZ5aOPbDg2TpRkUss7B-2wlrnhUAv19b27OUA|height=400,width=600!
 Expanded view of one thread:
 !https://dl-web.dropbox.com/get/Screen%20Shot%202014-12-18%20at%204.06.29%20PM.png?_subject_uid=303980955w=AACUO4wrbxheRUxv8fwQ9P52T6gBOm5_g9zeIe8odu3V3w|height=400,width=600!
 The cassandra we are using now (2.0.4) utilized MemoryAwareThreadPoolExecutor 
 as the request executor and provided a default request size estimator which 
 constantly returns 1, meaning it limits only the number of requests being 
 pushed to the pool. To have more fine-grained control on handling requests 
 and better protect our node from OOM issue, we propose implementing a more 
 precise estimator. 
 Here is our two cents:
 For update/delete/insert request: Size could be estimated by adding size of 
 all class members together.
 For scan query, the major part of the request is response, which can be 
 estimated from the history data. For example if we receive a scan query on a 
 column family for a certain token range, we keep track of its response size 
 used as the estimated response size for later scan query on the same cf. 
 For future requests on the same cf, response size could be calculated by 
 token range*recorded size/ recorded token range. The request size should be 
 estimated as (query size + estimated response size).
 We believe what we're proposing here can be useful for other people in the 
 Cassandra community as well. Would you mind providing us feedbacks? Please 
 let us know if you have any concerns or suggestions regarding this proposal.
 Thanks,
 Cheng



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8358) Bundled tools shouldn't be using Thrift API

2015-01-14 Thread Philip Thompson (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8358?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277874#comment-14277874
 ] 

Philip Thompson commented on CASSANDRA-8358:


Progress Update:
1. Completion of work on BulkLoader is blocked by 
https://datastax-oss.atlassian.net/browse/JAVA-312
2. I have an initial draft for both o.a.c.h.cql3.CqlRecordWriter and 
o.a.c.h.cql3.CqlRecordReader. pig-test is completely broken on trunk right now, 
so I haven't had a good opportunity to test them.
3. I am not touching o.a.c.h.ColumnFamily* on [~jjordan]'s recommendation.
4. o.a.c.h.pig.CqlNativeStorage extends CqlStorage which extends 
AbstractCassandraStorage. CassandraStorage also extends 
AbstractCassandraStorage. I will remove thrift from CqlNativeStorage. Should I 
also remove thrift from CqlStorage as well, or just deprecate it? It seems to 
me that I will need to remove the connection between CqlNativeStorage and 
CqlStorage, or CqlStorage and AbstractCassandraStorage in order to remove 
thrift without affecting CassandraStorage. What would be best here? 

 Bundled tools shouldn't be using Thrift API
 ---

 Key: CASSANDRA-8358
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8358
 Project: Cassandra
  Issue Type: Improvement
Reporter: Aleksey Yeschenko
Assignee: Philip Thompson
 Fix For: 3.0


 In 2.1, we switched cqlsh to the python-driver.
 In 3.0, we got rid of cassandra-cli.
 Yet there is still code that's using legacy Thrift API. We want to convert it 
 all to use the java-driver instead.
 1. BulkLoader uses Thrift to query the schema tables. It should be using 
 java-driver metadata APIs directly instead.
 2. o.a.c.hadoop.cql3.CqlRecordWriter is using Thrift
 3. o.a.c.hadoop.ColumnFamilyRecordReader is using Thrift
 4. o.a.c.hadoop.AbstractCassandraStorage is using Thrift
 5. o.a.c.hadoop.pig.CqlStorage is using Thrift
 Some of the things listed above use Thrift to get the list of partition key 
 columns or clustering columns. Those should be converted to use the Metadata 
 API of the java-driver.
 Somewhat related to that, we also have badly ported code from Thrift in 
 o.a.c.hadoop.cql3.CqlRecordReader (see fetchKeys()) that manually fetches 
 columns from schema tables instead of properly using the driver's Metadata 
 API.
 We need all of it fixed. One exception, for now, is 
 o.a.c.hadoop.AbstractColumnFamilyInputFormat - it's using Thrift for its 
 describe_splits_ex() call that cannot be currently replaced by any 
 java-driver call (?).
 Once this is done, we can stop starting Thrift RPC port by default in 
 cassandra.yaml.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


cassandra git commit: rm CqlStorage

2015-01-14 Thread brandonwilliams
Repository: cassandra
Updated Branches:
  refs/heads/trunk 879b694d3 - e245ba7af


rm CqlStorage


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e245ba7a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e245ba7a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e245ba7a

Branch: refs/heads/trunk
Commit: e245ba7af9801696db733fa3f88aa37135b5ae36
Parents: 879b694
Author: Brandon Williams brandonwilli...@apache.org
Authored: Wed Jan 14 17:33:33 2015 -0600
Committer: Brandon Williams brandonwilli...@apache.org
Committed: Wed Jan 14 17:33:33 2015 -0600

--
 NEWS.txt|  1 +
 .../apache/cassandra/hadoop/pig/CqlStorage.java | 41 
 2 files changed, 1 insertion(+), 41 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e245ba7a/NEWS.txt
--
diff --git a/NEWS.txt b/NEWS.txt
index b9c4173..e37e85f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -33,6 +33,7 @@ New features
 
 Upgrading
 -
+   - Pig's CqlStorage has been removed, use CqlNativeStorage instead
- IAuthenticator been updated to remove responsibility for user/role
  maintenance and is now solely responsible for validating credentials,
  This is primarily done via SASL, though an optional method exists for

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e245ba7a/src/java/org/apache/cassandra/hadoop/pig/CqlStorage.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/pig/CqlStorage.java 
b/src/java/org/apache/cassandra/hadoop/pig/CqlStorage.java
deleted file mode 100644
index c7277fa..000
--- a/src/java/org/apache/cassandra/hadoop/pig/CqlStorage.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.hadoop.pig;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-/**
- * @deprecated use CqlNativeStorage instead. CqlStorage will be removed.
- */
-public class CqlStorage extends CqlNativeStorage
-{
-private static final Logger logger = 
LoggerFactory.getLogger(CqlNativeStorage.class);
-
-public CqlStorage()
-{
-this(1000);
-logger.warn(CqlStorage is deprecated and will be removed in the next 
release, use CqlNativeStorage instead.);
-}
-
-/** @param pageSize limit number of CQL rows to fetch in a thrift request 
*/
-public CqlStorage(int pageSize)
-{
-super(pageSize);
-}
-}
-



[jira] [Commented] (CASSANDRA-8622) All of pig-test is failing in trunk

2015-01-14 Thread Brandon Williams (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14278036#comment-14278036
 ] 

Brandon Williams commented on CASSANDRA-8622:
-

Can you find out when this first started failing? At first glance it looks like 
adding sigar broke it.

 All of pig-test is failing in trunk
 ---

 Key: CASSANDRA-8622
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8622
 Project: Cassandra
  Issue Type: Test
  Components: Hadoop
Reporter: Philip Thompson
Assignee: Brandon Williams
 Fix For: 3.0


 See http://cassci.datastax.com/job/trunk_pigtest/330/testReport/
 Every test in the ant target {{ant pig-test}} has been failing on trunk for a 
 while now.
 {code}
 java.lang.ExceptionInInitializerError
   at org.apache.log4j.Logger.getLogger(Logger.java:40)
   at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:48)
   at org.hyperic.sigar.SigarLog.getLogger(SigarLog.java:44)
   at org.hyperic.sigar.SigarLog.debug(SigarLog.java:60)
   at org.hyperic.sigar.Sigar.clinit(Sigar.java:108)
   at org.apache.cassandra.utils.SigarLibrary.init(SigarLibrary.java:45)
   at 
 org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:206)
   at 
 org.apache.cassandra.service.CassandraDaemon.init(CassandraDaemon.java:408)
   at 
 org.apache.cassandra.service.EmbeddedCassandraService.start(EmbeddedCassandraService.java:52)
   at 
 org.apache.cassandra.pig.PigTestBase.startCassandra(PigTestBase.java:96)
   at 
 org.apache.cassandra.pig.CqlRecordReaderTest.setup(CqlRecordReaderTest.java:63)
   at 
 org.apache.log4j.Log4jLoggerFactory.clinit(Log4jLoggerFactory.java:50)
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8358) Bundled tools shouldn't be using Thrift API

2015-01-14 Thread Brandon Williams (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8358?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277886#comment-14277886
 ] 

Brandon Williams commented on CASSANDRA-8358:
-

As for 4, CqlStorage is just a dummy wrapper for CqlNativeStorage after 
CASSANDRA-8599.  I'm going to remove it for 3.0.

 Bundled tools shouldn't be using Thrift API
 ---

 Key: CASSANDRA-8358
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8358
 Project: Cassandra
  Issue Type: Improvement
Reporter: Aleksey Yeschenko
Assignee: Philip Thompson
 Fix For: 3.0


 In 2.1, we switched cqlsh to the python-driver.
 In 3.0, we got rid of cassandra-cli.
 Yet there is still code that's using legacy Thrift API. We want to convert it 
 all to use the java-driver instead.
 1. BulkLoader uses Thrift to query the schema tables. It should be using 
 java-driver metadata APIs directly instead.
 2. o.a.c.hadoop.cql3.CqlRecordWriter is using Thrift
 3. o.a.c.hadoop.ColumnFamilyRecordReader is using Thrift
 4. o.a.c.hadoop.AbstractCassandraStorage is using Thrift
 5. o.a.c.hadoop.pig.CqlStorage is using Thrift
 Some of the things listed above use Thrift to get the list of partition key 
 columns or clustering columns. Those should be converted to use the Metadata 
 API of the java-driver.
 Somewhat related to that, we also have badly ported code from Thrift in 
 o.a.c.hadoop.cql3.CqlRecordReader (see fetchKeys()) that manually fetches 
 columns from schema tables instead of properly using the driver's Metadata 
 API.
 We need all of it fixed. One exception, for now, is 
 o.a.c.hadoop.AbstractColumnFamilyInputFormat - it's using Thrift for its 
 describe_splits_ex() call that cannot be currently replaced by any 
 java-driver call (?).
 Once this is done, we can stop starting Thrift RPC port by default in 
 cassandra.yaml.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8414) Avoid loops over array backed iterators that call iter.remove()

2015-01-14 Thread Richard Low (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8414?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14278204#comment-14278204
 ] 

Richard Low commented on CASSANDRA-8414:


I tested this on some real workload SSTables and got a 2x speedup on force 
compaction! Also the output was the same as before.

Can someone commit the patch?

 Avoid loops over array backed iterators that call iter.remove()
 ---

 Key: CASSANDRA-8414
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8414
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Richard Low
Assignee: Jimmy MÃ¥rdell
  Labels: performance
 Fix For: 2.0.12, 2.1.3

 Attachments: cassandra-2.0-8414-1.txt, cassandra-2.0-8414-2.txt, 
 cassandra-2.0-8414-3.txt, cassandra-2.0-8414-4.txt, cassandra-2.0-8414-5.txt, 
 cassandra-2.1-8414-5.txt, cassandra-2.1-8414-6.txt


 I noticed from sampling that sometimes compaction spends almost all of its 
 time in iter.remove() in ColumnFamilyStore.removeDeletedStandard. It turns 
 out that the cf object is using ArrayBackedSortedColumns, so deletes are from 
 an ArrayList. If the majority of your columns are GCable tombstones then this 
 is O(n^2). The data structure should be changed or a copy made to avoid this.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (CASSANDRA-8624) Cassandra Cluster's Status Inconsistency Strangely

2015-01-14 Thread ZhongYu (JIRA)
ZhongYu created CASSANDRA-8624:
--

 Summary: Cassandra Cluster's Status Inconsistency Strangely
 Key: CASSANDRA-8624
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8624
 Project: Cassandra
  Issue Type: Bug
  Components: Tools
 Environment: Cassandra 1.2.11
Reporter: ZhongYu
Priority: Minor
 Attachments: QQ截图20150115125254.png

We found a strange phenomenon about Cassandra Cluster's status that all the 
nodes in the cluster found other node's status inconsistency. Especially, the 
inconsistency has an interesting patten. See the following example:

There are 5 nodes (pc17, pc19, pc21, pc23, pc25) in the cluster. Their seeds 
configuration are all pc17, pc19, pc21, pc23, pc25. In a moment,
pc17 found others UP;
pc19 found pc17 DN, others UP;
pc21 found pc17, pc19 DN, others UP;
pc23 found pc17, pc19, pc21 DN, others UP;
pc25 found pc17, pc19, pc21, pc23  DN, only self UP;

See attachments as screen's snapshot.




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (CASSANDRA-8625) LIST USERS and LIST PERMISSIONS command in cqlsh return Keyspace None not found.

2015-01-14 Thread ZhongYu (JIRA)
ZhongYu created CASSANDRA-8625:
--

 Summary: LIST USERS and LIST PERMISSIONS command in cqlsh return 
Keyspace None not found.
 Key: CASSANDRA-8625
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8625
 Project: Cassandra
  Issue Type: Bug
  Components: Tools
 Environment: cqlsh 5.0.1 | Cassandra 2.1.2 | CQL spec 3.2.0 | Native 
protocol v3
Reporter: ZhongYu


When open Cassandra authorization and authentication, LIST USERS and LIST 
PERMISSIONS command in cqlsh not work and always return Keyspace None not 
found.

When I login as super user cassandra and create some users.

cassandra@cqlsh list users;
Keyspace None not found.
cassandra@cqlsh list all permissions;
Keyspace None not found.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Comment Edited] (CASSANDRA-6565) New node refuses to join the ring.

2015-01-14 Thread Vladimir Kuptsov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-6565?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277020#comment-14277020
 ] 

Vladimir Kuptsov edited comment on CASSANDRA-6565 at 1/14/15 3:08 PM:
--

I have the same issue on 2.0.11 for 2-DC's cluster with 5 nodes in each.

It happens during the empty 5th node join

 WARN [StreamReceiveTask:2] 2015-01-14 18:58:54,725 StreamResultFuture.java 
(line 215) [Stream #4d5f7450-9bfd-11e4-8a10-bd59ad623a02] Stream failed
ERROR [main] 2015-01-14 18:58:54,726 CassandraDaemon.java (line 513) Exception 
encountered during startup
java.lang.RuntimeException: Error during boostrap: Stream failed
at org.apache.cassandra.dht.BootStrapper.bootstrap(BootStrapper.java:86)
at 
org.apache.cassandra.service.StorageService.bootstrap(StorageService.java:998)
at 
org.apache.cassandra.service.StorageService.joinTokenRing(StorageService.java:801)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:614)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:503)
at 
org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:378)
at 
org.apache.cassandra.service.CassandraDaemon.activate(CassandraDaemon.java:496)
at 
org.apache.cassandra.service.CassandraDaemon.main(CassandraDaemon.java:585)
Caused by: org.apache.cassandra.streaming.StreamException: Stream failed
at 
org.apache.cassandra.streaming.management.StreamEventJMXNotifier.onFailure(StreamEventJMXNotifier.java:85)
at com.google.common.util.concurrent.Futures$4.run(Futures.java:1160)
at 
com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)
at 
com.google.common.util.concurrent.ExecutionList.executeListener(ExecutionList.java:156)
at 
com.google.common.util.concurrent.ExecutionList.execute(ExecutionList.java:145)
at 
com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:202)
at 
org.apache.cassandra.streaming.StreamResultFuture.maybeComplete(StreamResultFuture.java:216)
at 
org.apache.cassandra.streaming.StreamResultFuture.handleSessionComplete(StreamResultFuture.java:191)
at 
org.apache.cassandra.streaming.StreamSession.closeSession(StreamSession.java:377)
at 
org.apache.cassandra.streaming.StreamSession.maybeCompleted(StreamSession.java:662)
at 
org.apache.cassandra.streaming.StreamSession.taskCompleted(StreamSession.java:613)
at 
org.apache.cassandra.streaming.StreamReceiveTask$OnCompletionRunnable.run(StreamReceiveTask.java:137)
at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)



was (Author: vkuptcov):
I have the same issue on 2.0.11
 WARN [StreamReceiveTask:2] 2015-01-14 18:58:54,725 StreamResultFuture.java 
(line 215) [Stream #4d5f7450-9bfd-11e4-8a10-bd59ad623a02] Stream failed
ERROR [main] 2015-01-14 18:58:54,726 CassandraDaemon.java (line 513) Exception 
encountered during startup
java.lang.RuntimeException: Error during boostrap: Stream failed
at org.apache.cassandra.dht.BootStrapper.bootstrap(BootStrapper.java:86)
at 
org.apache.cassandra.service.StorageService.bootstrap(StorageService.java:998)
at 
org.apache.cassandra.service.StorageService.joinTokenRing(StorageService.java:801)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:614)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:503)
at 
org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:378)
at 
org.apache.cassandra.service.CassandraDaemon.activate(CassandraDaemon.java:496)
at 
org.apache.cassandra.service.CassandraDaemon.main(CassandraDaemon.java:585)
Caused by: org.apache.cassandra.streaming.StreamException: Stream failed
at 
org.apache.cassandra.streaming.management.StreamEventJMXNotifier.onFailure(StreamEventJMXNotifier.java:85)
at com.google.common.util.concurrent.Futures$4.run(Futures.java:1160)
at 
com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)
at 
com.google.common.util.concurrent.ExecutionList.executeListener(ExecutionList.java:156)
at 
com.google.common.util.concurrent.ExecutionList.execute(ExecutionList.java:145)
at 
com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:202)
at 

[jira] [Assigned] (CASSANDRA-7688) Add data sizing to a system table

2015-01-14 Thread Aleksey Yeschenko (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-7688?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Aleksey Yeschenko reassigned CASSANDRA-7688:


Assignee: Aleksey Yeschenko

 Add data sizing to a system table
 -

 Key: CASSANDRA-7688
 URL: https://issues.apache.org/jira/browse/CASSANDRA-7688
 Project: Cassandra
  Issue Type: New Feature
Reporter: Jeremiah Jordan
Assignee: Aleksey Yeschenko
 Fix For: 2.1.3


 Currently you can't implement something similar to describe_splits_ex purely 
 from the a native protocol driver.  
 https://datastax-oss.atlassian.net/browse/JAVA-312 is open to expose easily 
 getting ownership information to a client in the java-driver.  But you still 
 need the data sizing part to get splits of a given size.  We should add the 
 sizing information to a system table so that native clients can get to it.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-6565) New node refuses to join the ring.

2015-01-14 Thread Vladimir Kuptsov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-6565?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277020#comment-14277020
 ] 

Vladimir Kuptsov commented on CASSANDRA-6565:
-

I have the same issue on 2.0.11
 WARN [StreamReceiveTask:2] 2015-01-14 18:58:54,725 StreamResultFuture.java 
(line 215) [Stream #4d5f7450-9bfd-11e4-8a10-bd59ad623a02] Stream failed
ERROR [main] 2015-01-14 18:58:54,726 CassandraDaemon.java (line 513) Exception 
encountered during startup
java.lang.RuntimeException: Error during boostrap: Stream failed
at org.apache.cassandra.dht.BootStrapper.bootstrap(BootStrapper.java:86)
at 
org.apache.cassandra.service.StorageService.bootstrap(StorageService.java:998)
at 
org.apache.cassandra.service.StorageService.joinTokenRing(StorageService.java:801)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:614)
at 
org.apache.cassandra.service.StorageService.initServer(StorageService.java:503)
at 
org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:378)
at 
org.apache.cassandra.service.CassandraDaemon.activate(CassandraDaemon.java:496)
at 
org.apache.cassandra.service.CassandraDaemon.main(CassandraDaemon.java:585)
Caused by: org.apache.cassandra.streaming.StreamException: Stream failed
at 
org.apache.cassandra.streaming.management.StreamEventJMXNotifier.onFailure(StreamEventJMXNotifier.java:85)
at com.google.common.util.concurrent.Futures$4.run(Futures.java:1160)
at 
com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)
at 
com.google.common.util.concurrent.ExecutionList.executeListener(ExecutionList.java:156)
at 
com.google.common.util.concurrent.ExecutionList.execute(ExecutionList.java:145)
at 
com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:202)
at 
org.apache.cassandra.streaming.StreamResultFuture.maybeComplete(StreamResultFuture.java:216)
at 
org.apache.cassandra.streaming.StreamResultFuture.handleSessionComplete(StreamResultFuture.java:191)
at 
org.apache.cassandra.streaming.StreamSession.closeSession(StreamSession.java:377)
at 
org.apache.cassandra.streaming.StreamSession.maybeCompleted(StreamSession.java:662)
at 
org.apache.cassandra.streaming.StreamSession.taskCompleted(StreamSession.java:613)
at 
org.apache.cassandra.streaming.StreamReceiveTask$OnCompletionRunnable.run(StreamReceiveTask.java:137)
at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)


 New node refuses to join the ring.
 --

 Key: CASSANDRA-6565
 URL: https://issues.apache.org/jira/browse/CASSANDRA-6565
 Project: Cassandra
  Issue Type: Bug
Reporter: Shao-Chuan Wang

 We have 30 nodes in one DC, 25 nodes in another. We are running 2.0.1.
 Two nodes are joining the ring, but one of them failed
 ARN [STREAM-IN-/10.4.197.53] 2014-01-09 19:41:40,418 StreamResultFuture.java 
 (line 209) [Stream #e515d6e0-795d-11e3-b74a-b72892248056] Stream failed
 ERROR [main] 2014-01-09 19:41:40,418 CassandraDaemon.java (line 459) 
 Exception encountered during startup
 java.lang.RuntimeException: Error during boostrap: Stream failed
 at 
 org.apache.cassandra.dht.BootStrapper.bootstrap(BootStrapper.java:86)
 at 
 org.apache.cassandra.service.StorageService.bootstrap(StorageService.java:901)
 at 
 org.apache.cassandra.service.StorageService.joinTokenRing(StorageService.java:670)
 at 
 org.apache.cassandra.service.StorageService.initServer(StorageService.java:529)
 at 
 org.apache.cassandra.service.StorageService.initServer(StorageService.java:428)
 at 
 org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:343)
 at 
 org.apache.cassandra.service.CassandraDaemon.activate(CassandraDaemon.java:442)
 at 
 org.apache.cassandra.service.CassandraDaemon.main(CassandraDaemon.java:485)
 Caused by: org.apache.cassandra.streaming.StreamException: Stream failed
 at 
 org.apache.cassandra.streaming.StreamResultFuture.maybeComplete(StreamResultFuture.java:210)
 at 
 org.apache.cassandra.streaming.StreamResultFuture.handleSessionComplete(StreamResultFuture.java:185)
 at 
 org.apache.cassandra.streaming.StreamSession.closeSession(StreamSession.java:321)
 at 
 org.apache.cassandra.streaming.StreamSession.complete(StreamSession.java:501) 
at 

[jira] [Reopened] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// for local file

2015-01-14 Thread Brandon Kearby (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Brandon Kearby reopened CASSANDRA-8292:
---
Since Version: 2.1.2
   Tester: Brandon Kearby

Hey Guys, we have this same issue again. It's showing up on a different place 
in the pig client code. I've attached a diff of where I removed them.

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8580) AssertionErrors after activating unchecked_tombstone_compaction with leveled compaction

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277033#comment-14277033
 ] 

Benedict commented on CASSANDRA-8580:
-

I've gone a little buzz-eyed looking at this today, so I'll take another look 
in a couple of days. There are a number of weird things going on, but the 
weirdest is that the file we are failing on _has already been compacted_. This 
should really have resulted in an assertion error much earlier on. There are a 
few things we can do in this area at least to reduce the likelihood of problems 
here, and to improve logging, but the interleavings of events that causes this 
seem likely to be convoluted, and may require these steps first in order to 
help narrow the state space we need to search in the code.

 AssertionErrors after activating unchecked_tombstone_compaction with leveled 
 compaction
 ---

 Key: CASSANDRA-8580
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8580
 Project: Cassandra
  Issue Type: Bug
Reporter: Björn Hachmann
Assignee: Benedict
 Fix For: 2.1.3

 Attachments: system.log


 During our upgrade of Cassandra from version 2.0.7 to 2.1.2 we experienced a 
 serious problem regarding the setting unchecked_tombstone_compaction in 
 combination with leveled compaction strategy.
 In order to prevent tombstone-threshold-warnings we activated the setting for 
 a specific table after the upgrade. Some time after that we observed new 
 errors in our log files:
 {code}
 INFO  [CompactionExecutor:184] 2014-12-11 12:36:06,597 
 CompactionTask.java:136 - Compacting 
 [SSTableReader(path='/data/cassandra/data/system/compactions_in_progress/system-compactions_in_progress-ka-1848-Data.db'),
  SSTableReader(path='/
 data/cassandra/data/system/compactions_in_progress/system-compactions_in_progress-ka-1847-Data.db'),
  
 SSTableReader(path='/data/cassandra/data/system/compactions_in_progress/system-compactions_in_progress-ka-1845-Data.db'),
  SSTableReader
 (path='/data/cassandra/data/system/compactions_in_progress/system-compactions_in_progress-ka-1846-Data.db')]
 ERROR [CompactionExecutor:183] 2014-12-11 12:36:06,613 
 CassandraDaemon.java:153 - Exception in thread 
 Thread[CompactionExecutor:183,1,main]
 java.lang.AssertionError: 
 /data/cassandra/data/metrigo_prod/new_user_data/metrigo_prod-new_user_data-tmplink-ka-705732-Data.db
 at 
 org.apache.cassandra.io.sstable.SSTableReader.getApproximateKeyCount(SSTableReader.java:243)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.runWith(CompactionTask.java:146)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.io.util.DiskAwareRunnable.runMayThrow(DiskAwareRunnable.java:48)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) 
 ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:75)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:59)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:232)
  ~[apache-cassandra-2.1.2.jar:2.1.2]
 at 
 java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
 ~[na:1.7.0_45]
 at java.util.concurrent.FutureTask.run(FutureTask.java:262) 
 ~[na:1.7.0_45]
 at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  ~[na:1.7.0_45]
 at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  [na:1.7.0_45]
 at java.lang.Thread.run(Thread.java:744) [na:1.7.0_45]
 {code}
 Obviously that error aborted the compaction and after some time the number of 
 pending compactions became very high on every node. Of course, this in turn 
 had a negative impact on several other metrics.
 After reverting the setting we had to restart all nodes. After that 
 compactions could finish again and the pending compactions could be worked 
 off.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// for local files

2015-01-14 Thread Brandon Kearby (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Brandon Kearby updated CASSANDRA-8292:
--
Attachment: patch.txt

Diff where offending code was removed.

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8548) Nodetool Cleanup - java.lang.AssertionError

2015-01-14 Thread Andrei Ivanov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8548?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277082#comment-14277082
 ] 

Andrei Ivanov commented on CASSANDRA-8548:
--

Ahhh, that's really weird. It's probably a bug in inserting our data. Any way 
to solve this. We are totally ok with deleting this data. I'm just worried to 
screw everything up.

 Nodetool Cleanup - java.lang.AssertionError
 ---

 Key: CASSANDRA-8548
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8548
 Project: Cassandra
  Issue Type: Bug
Reporter: Sebastian Estevez
Assignee: Marcus Eriksson
 Fix For: 2.0.12

 Attachments: 0001-make-sure-we-unmark-compacting.patch


 Needed to free up some space on a node but getting the dump below when 
 running nodetool cleanup.
 Tried turning on debug to try to obtain additional details in the logs but 
 nothing gets added to the logs when running cleanup. Added: 
 log4j.logger.org.apache.cassandra.db=DEBUG 
 in log4j-server.properties
 See the stack trace below:
 root@cassandra-019:~# nodetool cleanup
 {code}Error occurred during cleanup
 java.util.concurrent.ExecutionException: java.lang.IllegalArgumentException
 at java.util.concurrent.FutureTask.report(FutureTask.java:122)
 at java.util.concurrent.FutureTask.get(FutureTask.java:188)
 at 
 org.apache.cassandra.db.compaction.CompactionManager.performAllSSTableOperation(CompactionManager.java:228)
 at 
 org.apache.cassandra.db.compaction.CompactionManager.performCleanup(CompactionManager.java:266)
 at 
 org.apache.cassandra.db.ColumnFamilyStore.forceCleanup(ColumnFamilyStore.java:1112)
 at 
 org.apache.cassandra.service.StorageService.forceKeyspaceCleanup(StorageService.java:2162)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 at 
 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:75)
 at sun.reflect.GeneratedMethodAccessor17.invoke(Unknown Source)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:279)
 at 
 com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:112)
 at 
 com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:46)
 at 
 com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:237)
 at com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138)
 at com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:252)
 at 
 com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:819)
 at 
 com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:801)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1487)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.access$300(RMIConnectionImpl.java:97)
 at 
 javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1328)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1420)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:848)
 at sun.reflect.GeneratedMethodAccessor64.invoke(Unknown Source)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:322)
 at sun.rmi.transport.Transport$1.run(Transport.java:177)
 at sun.rmi.transport.Transport$1.run(Transport.java:174)
 at java.security.AccessController.doPrivileged(Native Method)
 at sun.rmi.transport.Transport.serviceCall(Transport.java:173)
 at 
 sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:556)
 at 
 sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:811)
 at 
 sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:670)
 at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 at java.lang.Thread.run(Thread.java:744)
 Caused by: java.lang.IllegalArgumentException
 at 

[jira] [Comment Edited] (CASSANDRA-8548) Nodetool Cleanup - java.lang.AssertionError

2015-01-14 Thread Andrei Ivanov (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8548?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277082#comment-14277082
 ] 

Andrei Ivanov edited comment on CASSANDRA-8548 at 1/14/15 3:40 PM:
---

Ahhh, that's really weird. It's probably a bug in inserting our data. Any way 
to solve this? We are totally ok with deleting this data. I'm just worried to 
screw everything up.


was (Author: aivanov93):
Ahhh, that's really weird. It's probably a bug in inserting our data. Any way 
to solve this. We are totally ok with deleting this data. I'm just worried to 
screw everything up.

 Nodetool Cleanup - java.lang.AssertionError
 ---

 Key: CASSANDRA-8548
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8548
 Project: Cassandra
  Issue Type: Bug
Reporter: Sebastian Estevez
Assignee: Marcus Eriksson
 Fix For: 2.0.12

 Attachments: 0001-make-sure-we-unmark-compacting.patch


 Needed to free up some space on a node but getting the dump below when 
 running nodetool cleanup.
 Tried turning on debug to try to obtain additional details in the logs but 
 nothing gets added to the logs when running cleanup. Added: 
 log4j.logger.org.apache.cassandra.db=DEBUG 
 in log4j-server.properties
 See the stack trace below:
 root@cassandra-019:~# nodetool cleanup
 {code}Error occurred during cleanup
 java.util.concurrent.ExecutionException: java.lang.IllegalArgumentException
 at java.util.concurrent.FutureTask.report(FutureTask.java:122)
 at java.util.concurrent.FutureTask.get(FutureTask.java:188)
 at 
 org.apache.cassandra.db.compaction.CompactionManager.performAllSSTableOperation(CompactionManager.java:228)
 at 
 org.apache.cassandra.db.compaction.CompactionManager.performCleanup(CompactionManager.java:266)
 at 
 org.apache.cassandra.db.ColumnFamilyStore.forceCleanup(ColumnFamilyStore.java:1112)
 at 
 org.apache.cassandra.service.StorageService.forceKeyspaceCleanup(StorageService.java:2162)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 at 
 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:75)
 at sun.reflect.GeneratedMethodAccessor17.invoke(Unknown Source)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:279)
 at 
 com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:112)
 at 
 com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:46)
 at 
 com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:237)
 at com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138)
 at com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:252)
 at 
 com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:819)
 at 
 com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:801)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1487)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.access$300(RMIConnectionImpl.java:97)
 at 
 javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1328)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1420)
 at 
 javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:848)
 at sun.reflect.GeneratedMethodAccessor64.invoke(Unknown Source)
 at 
 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:606)
 at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:322)
 at sun.rmi.transport.Transport$1.run(Transport.java:177)
 at sun.rmi.transport.Transport$1.run(Transport.java:174)
 at java.security.AccessController.doPrivileged(Native Method)
 at sun.rmi.transport.Transport.serviceCall(Transport.java:173)
 at 
 sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:556)
 at 
 sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:811)
 at 
 sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:670)
 at 
 

[jira] [Issue Comment Deleted] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// f

2015-01-14 Thread Brandon Kearby (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Brandon Kearby updated CASSANDRA-8292:
--
Comment: was deleted

(was: Attached diff)

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Issue Comment Deleted] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// f

2015-01-14 Thread Brandon Kearby (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Brandon Kearby updated CASSANDRA-8292:
--
Comment: was deleted

(was: Diff where offending code was removed.)

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8623) sstablesplit fails *randomly* with Data component is missing

2015-01-14 Thread Alan Boudreault (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alan Boudreault updated CASSANDRA-8623:
---
Description: 
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been able to reproduce the issue with a single sstablesplit 
command. ie, specifying all files with glob matching.
* I can reproduce the bug if I call multiple sstablesplit one file at the time 
(the way ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.


  was:
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been able to reproduce the issue with a single sstablesplit 
command. ie, specifying all files with glob matching.
* I can reproduce the bug if I call multiple sstablesplit on a single file (the 
way ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.



 sstablesplit fails *randomly* with Data component is missing
 

 Key: CASSANDRA-8623
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8623
 Project: Cassandra
  Issue Type: Bug
Reporter: Alan Boudreault
Assignee: Marcus Eriksson
 Attachments: output.log


 I'm experiencing an issue related to sstablesplit. I would like to understand 
 if I am doing something wrong or there is an issue in the split process. The 
 process fails randomly with the following exception:
 {code}
 ERROR 02:17:36 Error in ThreadPoolExecutor
 java.lang.AssertionError: Data component is missing for 
 sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
 {code}
 See attached output.log file. The process never stops after this exception 
 and I've also seen the dataset growing indefinitely (number of sstables).  
 * I have not been able to reproduce the issue with a single sstablesplit 
 command. ie, specifying all files with glob matching.
 * I can reproduce the bug if I call multiple sstablesplit one file at the 
 time (the way ccm does)
 Here is the test case file to reproduce the bug:
 https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing
 1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
 branch binaries.
 2. Extract it
 3. CD inside the use case directory
 4. 

[jira] [Updated] (CASSANDRA-8623) sstablesplit fails *randomly* with Data component is missing

2015-01-14 Thread Alan Boudreault (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alan Boudreault updated CASSANDRA-8623:
---
Description: 
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been able to reproduce the issue with a single sstablesplit 
command. ie, specifying all files with glob matching.
* I can reproduce the if I call multiple sstablesplit on a single file (the way 
ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.


  was:
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been to reproduce the issue with a single sstablesplit command. 
ie, specifying all files with glob matching.
* I can reproduce the if I call multiple sstablesplit on a single file (the way 
ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.



 sstablesplit fails *randomly* with Data component is missing
 

 Key: CASSANDRA-8623
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8623
 Project: Cassandra
  Issue Type: Bug
Reporter: Alan Boudreault
Assignee: Marcus Eriksson
 Attachments: output.log


 I'm experiencing an issue related to sstablesplit. I would like to understand 
 if I am doing something wrong or there is an issue in the split process. The 
 process fails randomly with the following exception:
 {code}
 ERROR 02:17:36 Error in ThreadPoolExecutor
 java.lang.AssertionError: Data component is missing for 
 sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
 {code}
 See attached output.log file. The process never stops after this exception 
 and I've also seen the dataset growing indefinitely (number of sstables).  
 * I have not been able to reproduce the issue with a single sstablesplit 
 command. ie, specifying all files with glob matching.
 * I can reproduce the if I call multiple sstablesplit on a single file (the 
 way ccm does)
 Here is the test case file to reproduce the bug:
 https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing
 1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
 branch binaries.
 2. Extract it
 3. CD inside the use case directory
 4. Download the dataset (2G) just 

[jira] [Updated] (CASSANDRA-8623) sstablesplit fails *randomly* with Data component is missing

2015-01-14 Thread Alan Boudreault (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alan Boudreault updated CASSANDRA-8623:
---
Description: 
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been able to reproduce the issue with a single sstablesplit 
command. ie, specifying all files with glob matching.
* I can reproduce the bug if I call multiple sstablesplit on a single file (the 
way ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.


  was:
I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been able to reproduce the issue with a single sstablesplit 
command. ie, specifying all files with glob matching.
* I can reproduce the if I call multiple sstablesplit on a single file (the way 
ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.



 sstablesplit fails *randomly* with Data component is missing
 

 Key: CASSANDRA-8623
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8623
 Project: Cassandra
  Issue Type: Bug
Reporter: Alan Boudreault
Assignee: Marcus Eriksson
 Attachments: output.log


 I'm experiencing an issue related to sstablesplit. I would like to understand 
 if I am doing something wrong or there is an issue in the split process. The 
 process fails randomly with the following exception:
 {code}
 ERROR 02:17:36 Error in ThreadPoolExecutor
 java.lang.AssertionError: Data component is missing for 
 sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
 {code}
 See attached output.log file. The process never stops after this exception 
 and I've also seen the dataset growing indefinitely (number of sstables).  
 * I have not been able to reproduce the issue with a single sstablesplit 
 command. ie, specifying all files with glob matching.
 * I can reproduce the bug if I call multiple sstablesplit on a single file 
 (the way ccm does)
 Here is the test case file to reproduce the bug:
 https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing
 1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
 branch binaries.
 2. Extract it
 3. CD inside the use case directory
 4. Download the 

[jira] [Created] (CASSANDRA-8623) sstablesplit fails *randomly* with Data component is missing

2015-01-14 Thread Alan Boudreault (JIRA)
Alan Boudreault created CASSANDRA-8623:
--

 Summary: sstablesplit fails *randomly* with Data component is 
missing
 Key: CASSANDRA-8623
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8623
 Project: Cassandra
  Issue Type: Bug
Reporter: Alan Boudreault
Assignee: Marcus Eriksson
 Attachments: output.log

I'm experiencing an issue related to sstablesplit. I would like to understand 
if I am doing something wrong or there is an issue in the split process. The 
process fails randomly with the following exception:
{code}
ERROR 02:17:36 Error in ThreadPoolExecutor
java.lang.AssertionError: Data component is missing for 
sstable./tools/bin/../../data/data/system/compactions_in_progress-55080ab05d9c388690a4acb25fe1f77b/system-compactions_in_progress-ka-16
{code}

See attached output.log file. The process never stops after this exception and 
I've also seen the dataset growing indefinitely (number of sstables).  

* I have not been to reproduce the issue with a single sstablesplit command. 
ie, specifying all files with glob matching.
* I can reproduce the if I call multiple sstablesplit on a single file (the way 
ccm does)

Here is the test case file to reproduce the bug:

https://drive.google.com/file/d/0BwZ_GPM33j6KdVh0NTdkOWV2R1E/view?usp=sharing

1. Download the split_issue.tar.gz file. It includes latest cassandra-2.1 
branch binaries.
2. Extract it
3. CD inside the use case directory
4. Download the dataset (2G) just to be sure we have the same thing, and place 
it in the working directory.
   https://docs.google.com/uc?id=0BwZ_GPM33j6KV3ViNnpPcVFndUUexport=download
5. The first time, run ./test.sh. This will setup and run a test.
6. The next times, you can only run ./test --no-setup . This will only reset 
the dataset as its initial state and re-run the test. You might have to run the 
tests some times before experiencing it... but I'm always able with only 2-3 
runs.




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277131#comment-14277131
 ] 

Benedict commented on CASSANDRA-8614:
-

Perhaps. But people might run C* with different VMs, and so result in a bad 
choice (i.e. might try JDK8, find it unstable, and rollback - but now have all 
their data with more expensive checksums).

We should probably also detect the hardware to decide if running with native 
CRC is a good thing, versus Adler. Or perhaps perform some benchmarks on other 
hardware. It's likely still slower on hardware without the native instructions.

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[1/2] cassandra git commit: Fix NPE when passing wrong argument in ALTER TABLE statement

2015-01-14 Thread aleksey
Repository: cassandra
Updated Branches:
  refs/heads/trunk 7a4752843 - 270a05d63


Fix NPE when passing wrong argument in ALTER TABLE statement

patch by Benjamin Lerer; reviewed by Robert Stupp for CASSANDRA-8355


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/25d4a13d
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/25d4a13d
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/25d4a13d

Branch: refs/heads/trunk
Commit: 25d4a13d5621f11290af83b81b0be1588ffc87db
Parents: cf07fc2
Author: Benjamin Lerer b_le...@hotmail.com
Authored: Wed Jan 14 19:18:49 2015 +0300
Committer: Aleksey Yeschenko alek...@apache.org
Committed: Wed Jan 14 19:18:49 2015 +0300

--
 CHANGES.txt  |  1 +
 src/java/org/apache/cassandra/cql3/Cql.g | 15 +--
 .../org/apache/cassandra/cql3/AlterTableTest.java| 11 +--
 3 files changed, 19 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 175a78a..8f312a7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Fix NPE when passing wrong argument in ALTER TABLE statement 
(CASSANDRA-8355)
  * Pig: Refactor and deprecate CqlStorage (CASSANDRA-8599)
  * Don't reuse the same cleanup strategy for all sstables (CASSANDRA-8537)
  * Fix case-sensitivity of index name on CREATE and DROP INDEX

http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/src/java/org/apache/cassandra/cql3/Cql.g
--
diff --git a/src/java/org/apache/cassandra/cql3/Cql.g 
b/src/java/org/apache/cassandra/cql3/Cql.g
index eda0529..9067fc4 100644
--- a/src/java/org/apache/cassandra/cql3/Cql.g
+++ b/src/java/org/apache/cassandra/cql3/Cql.g
@@ -942,16 +942,19 @@ functionName returns [String s]
 | K_TOKEN   { $s = token; }
 ;
 
-functionArgs returns [ListTerm.Raw a]
-: '(' ')' { $a = Collections.emptyList(); }
-| '(' t1=term { ListTerm.Raw args = new ArrayListTerm.Raw(); 
args.add(t1); }
-  ( ',' tn=term { args.add(tn); } )*
-   ')' { $a = args; }
+function returns [Term.Raw t]
+: f=functionName '(' ')'   { $t = new FunctionCall.Raw(f, 
Collections.Term.RawemptyList()); }
+| f=functionName '(' args=functionArgs ')' { $t = new FunctionCall.Raw(f, 
args); }
+;
+
+functionArgs returns [ListTerm.Raw args]
+@init{ $args = new ArrayListTerm.Raw(); }
+: t1=term {args.add(t1); } ( ',' tn=term { args.add(tn); } )*
 ;
 
 term returns [Term.Raw term]
 : v=value  { $term = v; }
-| f=functionName args=functionArgs { $term = new FunctionCall.Raw(f, 
args); }
+| f=function   { $term = f; }
 | '(' c=comparatorType ')' t=term  { $term = new TypeCast(c, t); }
 ;
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
--
diff --git a/test/unit/org/apache/cassandra/cql3/AlterTableTest.java 
b/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
index f5747ed..9668a41 100644
--- a/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
+++ b/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
@@ -19,8 +19,6 @@ package org.apache.cassandra.cql3;
 
 import org.junit.Test;
 
-import org.apache.cassandra.exceptions.InvalidRequestException;
-
 public class AlterTableTest extends CQLTester
 {
 @Test
@@ -83,4 +81,13 @@ public class AlterTableTest extends CQLTester
 
 assertInvalid(ALTER TABLE %s ADD myCollection mapint, int;);
 }
+
+@Test
+public void testChangeStrategyWithUnquotedAgrument() throws Throwable
+{
+createTable(CREATE TABLE %s (id text PRIMARY KEY););
+
+assertInvalidSyntaxMessage(no viable alternative at input '}',
+   ALTER TABLE %s WITH caching = {'keys' : 
'all', 'rows_per_partition' : ALL};);
+}
 }



[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Ariel Weisberg (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277185#comment-14277185
 ] 

Ariel Weisberg commented on CASSANDRA-8614:
---

It seems like the fallback in JDK 8 is to a slicing table based implementation, 
but it isn't the slicing by 8 algorithm. It was considered not enough better.

http://mail.openjdk.java.net/pipermail/aarch64-port-dev/2014-May/001113.html
http://people.linaro.org/~edward.nevill/crc32/crc32.patch

I wonder how safe it is to assume that hardware running C* will have it. It was 
introduced in 2010 http://en.wikipedia.org/wiki/CLMUL_instruction_set There is 
also the question of whether it is enabled in a virtualized environment.

I will benchmark the fallback option and see how it compares to PureJavaCRC32.


 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
Assignee: Benedict
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Comment Edited] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// for loca

2015-01-14 Thread Brandon Kearby (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277049#comment-14277049
 ] 

Brandon Kearby edited comment on CASSANDRA-8292 at 1/14/15 3:44 PM:


Hey Guys, we have this same issue again. It's showing up on a different place 
in the pig client code. I've attached a diff called: 'patch.txt' of where I 
removed them.


was (Author: brandon.kearby):
Hey Guys, we have this same issue again. It's showing up on a different place 
in the pig client code. I've attached a diff of where I removed them.

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277113#comment-14277113
 ] 

Benedict commented on CASSANDRA-8614:
-

I don't really mind. Both can be error prone - CompletableFuture could be 
included as a library in an older VM. Both are _unlikely_ to cause problems, 
and the worst outcome is a bad decision about performance, not a catastrophe.

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Ariel Weisberg (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277120#comment-14277120
 ] 

Ariel Weisberg commented on CASSANDRA-8614:
---

What release would we want this to go in? 3.0? Will 3.0 require JDK 8?

Does it make sense to drive the choice for creating checksummed data based on 
what version of the JDK is running so people who use Java 8 at runtime can get 
the benefit even if Java 8 is not required?

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[2/2] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread aleksey
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/270a05d6
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/270a05d6
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/270a05d6

Branch: refs/heads/trunk
Commit: 270a05d63fb4fb6425b3f01abbdd63243dd955e8
Parents: 7a47528 25d4a13
Author: Aleksey Yeschenko alek...@apache.org
Authored: Wed Jan 14 19:20:32 2015 +0300
Committer: Aleksey Yeschenko alek...@apache.org
Committed: Wed Jan 14 19:20:32 2015 +0300

--
 CHANGES.txt  |  1 +
 src/java/org/apache/cassandra/cql3/Cql.g | 15 +--
 .../org/apache/cassandra/cql3/AlterTableTest.java| 11 +--
 3 files changed, 19 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/270a05d6/CHANGES.txt
--
diff --cc CHANGES.txt
index 937515e,8f312a7..714863f
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,54 -1,5 +1,55 @@@
 +3.0
 + * Group sstables for anticompaction correctly (CASSANDRA-8578)
 + * Add ReadFailureException to native protocol, respond
 +   immediately when replicas encounter errors while handling
 +   a read request (CASSANDRA-7886)
 + * Switch CommitLogSegment from RandomAccessFile to nio (CASSANDRA-8308)
 + * Allow mixing token and partition key restrictions (CASSANDRA-7016)
 + * Support index key/value entries on map collections (CASSANDRA-8473)
 + * Modernize schema tables (CASSANDRA-8261)
 + * Support for user-defined aggregation functions (CASSANDRA-8053)
 + * Fix NPE in SelectStatement with empty IN values (CASSANDRA-8419)
 + * Refactor SelectStatement, return IN results in natural order instead
 +   of IN value list order (CASSANDRA-7981)
 + * Support UDTs, tuples, and collections in user-defined
 +   functions (CASSANDRA-7563)
 + * Fix aggregate fn results on empty selection, result column name,
 +   and cqlsh parsing (CASSANDRA-8229)
 + * Mark sstables as repaired after full repair (CASSANDRA-7586)
 + * Extend Descriptor to include a format value and refactor reader/writer
 +   APIs (CASSANDRA-7443)
 + * Integrate JMH for microbenchmarks (CASSANDRA-8151)
 + * Keep sstable levels when bootstrapping (CASSANDRA-7460)
 + * Add Sigar library and perform basic OS settings check on startup 
(CASSANDRA-7838)
 + * Support for aggregation functions (CASSANDRA-4914)
 + * Remove cassandra-cli (CASSANDRA-7920)
 + * Accept dollar quoted strings in CQL (CASSANDRA-7769)
 + * Make assassinate a first class command (CASSANDRA-7935)
 + * Support IN clause on any clustering column (CASSANDRA-4762)
 + * Improve compaction logging (CASSANDRA-7818)
 + * Remove YamlFileNetworkTopologySnitch (CASSANDRA-7917)
 + * Do anticompaction in groups (CASSANDRA-6851)
 + * Support user-defined functions (CASSANDRA-7395, 7526, 7562, 7740, 7781, 
7929,
 +   7924, 7812, 8063, 7813, 7708)
 + * Permit configurable timestamps with cassandra-stress (CASSANDRA-7416)
 + * Move sstable RandomAccessReader to nio2, which allows using the
 +   FILE_SHARE_DELETE flag on Windows (CASSANDRA-4050)
 + * Remove CQL2 (CASSANDRA-5918)
 + * Add Thrift get_multi_slice call (CASSANDRA-6757)
 + * Optimize fetching multiple cells by name (CASSANDRA-6933)
 + * Allow compilation in java 8 (CASSANDRA-7028)
 + * Make incremental repair default (CASSANDRA-7250)
 + * Enable code coverage thru JaCoCo (CASSANDRA-7226)
 + * Switch external naming of 'column families' to 'tables' (CASSANDRA-4369) 
 + * Shorten SSTable path (CASSANDRA-6962)
 + * Use unsafe mutations for most unit tests (CASSANDRA-6969)
 + * Fix race condition during calculation of pending ranges (CASSANDRA-7390)
 + * Fail on very large batch sizes (CASSANDRA-8011)
 + * Improve concurrency of repair (CASSANDRA-6455, 8208)
 +
 +
  2.1.3
+  * Fix NPE when passing wrong argument in ALTER TABLE statement 
(CASSANDRA-8355)
   * Pig: Refactor and deprecate CqlStorage (CASSANDRA-8599)
   * Don't reuse the same cleanup strategy for all sstables (CASSANDRA-8537)
   * Fix case-sensitivity of index name on CREATE and DROP INDEX

http://git-wip-us.apache.org/repos/asf/cassandra/blob/270a05d6/src/java/org/apache/cassandra/cql3/Cql.g
--
diff --cc src/java/org/apache/cassandra/cql3/Cql.g
index d8496b1,9067fc4..d73dc28
--- a/src/java/org/apache/cassandra/cql3/Cql.g
+++ b/src/java/org/apache/cassandra/cql3/Cql.g
@@@ -1038,23 -936,20 +1038,26 @@@ intValue returns [Term.Raw value
  | QMARK { $value = newBindVariables(null); }
  ;
  
 -functionName returns [String s]
 -: f=IDENT   { $s = $f.text; }
 +functionName returns [FunctionName s]
 +: (ks=keyspaceName '.')? 

[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277160#comment-14277160
 ] 

Benedict commented on CASSANDRA-8614:
-

It would be great to run this on a non-modern system, or an AMD box, see what 
the differential is,

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Ariel Weisberg (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277162#comment-14277162
 ] 

Ariel Weisberg commented on CASSANDRA-8614:
---

If you switch outright how do you read tables that were written using the old 
checksum? I assumed the table metadata already contained the checksum type.

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Aleksey Yeschenko (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277115#comment-14277115
 ] 

Aleksey Yeschenko commented on CASSANDRA-8614:
--

Once we require JDK8, we might want to switch our sstable checksums back to CRC 
(from Adler), if intrinsic CRC32 happens to be faster (which it should).

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8550) Internal pagination in CQL3 index queries creating substantial overhead

2015-01-14 Thread Benjamin Lerer (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8550?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277121#comment-14277121
 ] 

Benjamin Lerer commented on CASSANDRA-8550:
---

LGTM

 Internal pagination in CQL3 index queries creating substantial overhead
 ---

 Key: CASSANDRA-8550
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8550
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Reporter: Samuel Klock
Assignee: Tyler Hobbs
 Fix For: 2.0.12, 2.1.3

 Attachments: 8550-2.0.txt


 While benchmarking CQL3 secondary indexes in 2.1.2, we've noticed substantial 
 performance degradation as the volume of indexed data increases.  In trying 
 to figure out what's going on, we found that a major factor contributing to 
 this degradation appears to be logic in 
 {{o.a.c.db.index.composites.CompositesSearcher}} used to paginate scans of 
 index tables.  In particular, in the use cases we've explored, this short 
 algorithm used to select a page size appears to be the culprit:
 {code:java}
 private int meanColumns = 
 Math.max(index.getIndexCfs().getMeanColumns(), 1);
 // We shouldn't fetch only 1 row as this provides buggy paging in 
 case the first row doesn't satisfy all clauses
 private int rowsPerQuery = Math.max(Math.min(filter.maxRows(), 
 filter.maxColumns() / meanColumns), 2);
 {code}
 In indexes where the cardinality doesn't scale linearly with the volume of 
 data indexed, it seems likely that the value of {{meanColumns}} will steadily 
 rise in write-heavy workloads.  In the cases we've explored, 
 {{filter.maxColumns()}} returns a small enough number (related to the lesser 
 of the native-protocol page size or the user-specified limit for the query) 
 that, after {{meanColumns}} reaches a few thousand, {{rowsPerQuery}} (the 
 page size) is consistently set to 2.
 The resulting overhead is severe.  In our environment, if we fix 
 {{rowsPerQuery}} to some reasonably large constant (e.g., 5,000), queries 
 that with the existing logic would require over two minutes to complete can 
 run in under ten seconds.
 Using a constant clearly seems like the wrong answer.  But the overhead the 
 existing algorithm seems to introduce suggests that it isn't the right answer 
 either.  An intuitive solution might be to use the minimum of 
 {{filter.maxRows()}} and {{filter.maxColumns()}} (or 2 if both of those are 
 1), but it's not immediately clear that there aren't safety considerations 
 the algorithm is attempting to account for that this strategy does not.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Aleksey Yeschenko (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277139#comment-14277139
 ] 

Aleksey Yeschenko commented on CASSANDRA-8614:
--

3.0 will not require JDK8, but 3.1 will probably should. As things stand right 
now, the checksum algo depends strictly on the version in the descriptor, and 
those are static.

We could probably encode the algo used in metadata, easily, so the current 
scheme is not a big deal of a limitation in itself, but I'd slightly prefer 
switching in 3.1, unconditionally.

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Benedict (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277117#comment-14277117
 ] 

Benedict commented on CASSANDRA-8614:
-

bq. we -might want- probably should

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
  Labels: performance
 Attachments: 8614.patch


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


cassandra git commit: Fix NPE when passing wrong argument in ALTER TABLE statement

2015-01-14 Thread aleksey
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 cf07fc259 - 25d4a13d5


Fix NPE when passing wrong argument in ALTER TABLE statement

patch by Benjamin Lerer; reviewed by Robert Stupp for CASSANDRA-8355


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/25d4a13d
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/25d4a13d
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/25d4a13d

Branch: refs/heads/cassandra-2.1
Commit: 25d4a13d5621f11290af83b81b0be1588ffc87db
Parents: cf07fc2
Author: Benjamin Lerer b_le...@hotmail.com
Authored: Wed Jan 14 19:18:49 2015 +0300
Committer: Aleksey Yeschenko alek...@apache.org
Committed: Wed Jan 14 19:18:49 2015 +0300

--
 CHANGES.txt  |  1 +
 src/java/org/apache/cassandra/cql3/Cql.g | 15 +--
 .../org/apache/cassandra/cql3/AlterTableTest.java| 11 +--
 3 files changed, 19 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 175a78a..8f312a7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Fix NPE when passing wrong argument in ALTER TABLE statement 
(CASSANDRA-8355)
  * Pig: Refactor and deprecate CqlStorage (CASSANDRA-8599)
  * Don't reuse the same cleanup strategy for all sstables (CASSANDRA-8537)
  * Fix case-sensitivity of index name on CREATE and DROP INDEX

http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/src/java/org/apache/cassandra/cql3/Cql.g
--
diff --git a/src/java/org/apache/cassandra/cql3/Cql.g 
b/src/java/org/apache/cassandra/cql3/Cql.g
index eda0529..9067fc4 100644
--- a/src/java/org/apache/cassandra/cql3/Cql.g
+++ b/src/java/org/apache/cassandra/cql3/Cql.g
@@ -942,16 +942,19 @@ functionName returns [String s]
 | K_TOKEN   { $s = token; }
 ;
 
-functionArgs returns [ListTerm.Raw a]
-: '(' ')' { $a = Collections.emptyList(); }
-| '(' t1=term { ListTerm.Raw args = new ArrayListTerm.Raw(); 
args.add(t1); }
-  ( ',' tn=term { args.add(tn); } )*
-   ')' { $a = args; }
+function returns [Term.Raw t]
+: f=functionName '(' ')'   { $t = new FunctionCall.Raw(f, 
Collections.Term.RawemptyList()); }
+| f=functionName '(' args=functionArgs ')' { $t = new FunctionCall.Raw(f, 
args); }
+;
+
+functionArgs returns [ListTerm.Raw args]
+@init{ $args = new ArrayListTerm.Raw(); }
+: t1=term {args.add(t1); } ( ',' tn=term { args.add(tn); } )*
 ;
 
 term returns [Term.Raw term]
 : v=value  { $term = v; }
-| f=functionName args=functionArgs { $term = new FunctionCall.Raw(f, 
args); }
+| f=function   { $term = f; }
 | '(' c=comparatorType ')' t=term  { $term = new TypeCast(c, t); }
 ;
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/25d4a13d/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
--
diff --git a/test/unit/org/apache/cassandra/cql3/AlterTableTest.java 
b/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
index f5747ed..9668a41 100644
--- a/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
+++ b/test/unit/org/apache/cassandra/cql3/AlterTableTest.java
@@ -19,8 +19,6 @@ package org.apache.cassandra.cql3;
 
 import org.junit.Test;
 
-import org.apache.cassandra.exceptions.InvalidRequestException;
-
 public class AlterTableTest extends CQLTester
 {
 @Test
@@ -83,4 +81,13 @@ public class AlterTableTest extends CQLTester
 
 assertInvalid(ALTER TABLE %s ADD myCollection mapint, int;);
 }
+
+@Test
+public void testChangeStrategyWithUnquotedAgrument() throws Throwable
+{
+createTable(CREATE TABLE %s (id text PRIMARY KEY););
+
+assertInvalidSyntaxMessage(no viable alternative at input '}',
+   ALTER TABLE %s WITH caching = {'keys' : 
'all', 'rows_per_partition' : ALL};);
+}
 }



[jira] [Updated] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Ariel Weisberg (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ariel Weisberg updated CASSANDRA-8614:
--
Attachment: Sample.java

JMH benchmark. For small sizes it's not as fast, but at a kilobyte it is many 
times faster. It also doesn't evict random cache lines so the impact could be 
larger than what the micro benchmark shows.

For large sizes it indeed does 13 gigabytes/sec which is pretty crazy.

There is a performance delta between direct and non-direct byte buffers in 
favor of direct byte buffers and the one case I looked at it was 2x faster.

{noformat}
 [java] Benchmark (byteSize)   Mode 
 Samples Score Error  Units
 [java] o.a.c.t.m.Sample.CRC32Array  128  thrpt 
   6  13905041.788 ±  598179.976  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32   128  thrpt 
   6  10525663.252 ±  507525.667  ops/s

 [java] o.a.c.t.m.Sample.CRC32Array  512  thrpt 
   6  14571599.254 ± 8930061.376  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32   512  thrpt 
   6   2835430.274 ±   92029.259  ops/s

 [java] o.a.c.t.m.Sample.CRC32Array 1024  thrpt 
   6   8337714.641 ± 3988493.638  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32  1024  thrpt 
   6   1428928.434 ±   31709.319  ops/s

 [java] o.a.c.t.m.Sample.CRC32Array  1048576  thrpt 
   6 12364.723 ± 344.434  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32   1048576  thrpt 
   6  1412.017 ±  89.214  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBuffer 128  thrpt 
   6  15925509.375 ±  779733.985  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBuffer 128  thrpt 
   6  10446360.681 ±  599847.210  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBuffer 512  thrpt 
   6  10906108.722 ±  346735.334  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBuffer 512  thrpt 
   6   2873179.754 ±  140004.771  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBuffer1024  thrpt 
   6   6582936.616 ± 2219292.645  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBuffer1024  thrpt 
   6   1440343.345 ±   42303.806  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBuffer 1048576  thrpt 
   6 12555.846 ± 514.918  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBuffer 1048576  thrpt 
   6  1414.886 ±  58.363  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirect   128  thrpt 
   6  31786603.552 ± 2000265.643  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBufferDirect   128  thrpt 
   6   9169128.441 ±  296419.993  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirect   512  thrpt 
   6  15768165.220 ±  589215.966  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBufferDirect   512  thrpt 
   6   2614215.362 ±  171099.973  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirect  1024  thrpt 
   6   9846566.689 ±  447235.143  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBufferDirect  1024  thrpt 
   6   1327731.561 ±   41147.584  ops/s

 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirect   1048576  thrpt 
   6 12467.127 ± 543.952  ops/s
 [java] o.a.c.t.m.Sample.PureJavaCrc32ByteBufferDirect   1048576  thrpt 
   6  1333.941 ±  20.311  ops/s


 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirectWrapped128  thrpt 
   6  30545863.214 ± 2669919.886  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirectWrapped512  thrpt 
   6  14929967.141 ± 1596223.606  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirectWrapped   1024  thrpt 
   6   9408037.238 ±  564849.404  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferDirectWrapped1048576  thrpt 
   6 12020.464 ± 417.515  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferWrapped  128  thrpt 
   6  12996481.274 ± 9216253.478  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferWrapped  512  thrpt 
   6   9632311.965 ± 4249496.365  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferWrapped 1024  thrpt 
   6   7068335.746 ± 2112734.871  ops/s
 [java] o.a.c.t.m.Sample.CRC32ByteBufferWrapped  1048576  thrpt 
   6 12580.275 ± 838.737  ops/s
{noformat}

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: 

[jira] [Assigned] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Benedict (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Benedict reassigned CASSANDRA-8614:
---

Assignee: Benedict

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
Assignee: Benedict
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[1/2] cassandra git commit: Remove JVMStabilityInspector from hadoop code

2015-01-14 Thread jmckenzie
Repository: cassandra
Updated Branches:
  refs/heads/trunk 2fc9f3292 - c65a9f5c6


Remove JVMStabilityInspector from hadoop code

Patch by jmckenzie; reviewed by bwilliams as follow-up for CASSANDRA-8292


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2ff91376
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2ff91376
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2ff91376

Branch: refs/heads/trunk
Commit: 2ff913767437ae919a72ec00c2437aa7ce904f6e
Parents: 7804a53
Author: Joshua McKenzie jmcken...@apache.org
Authored: Wed Jan 14 11:45:26 2015 -0600
Committer: Joshua McKenzie jmcken...@apache.org
Committed: Wed Jan 14 11:45:26 2015 -0600

--
 .../org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java| 2 --
 src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java   | 2 --
 src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java   | 4 +---
 src/java/org/apache/cassandra/hadoop/pig/CqlNativeStorage.java   | 1 -
 4 files changed, 1 insertion(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java 
b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
index a1b04f0..d6a873b 100644
--- a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
+++ b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
@@ -26,7 +26,6 @@ import java.util.*;
 import org.apache.cassandra.dht.Range;
 import org.apache.cassandra.dht.Token;
 import org.apache.cassandra.thrift.*;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.cassandra.utils.Pair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -199,7 +198,6 @@ final class ColumnFamilyRecordWriter extends 
AbstractColumnFamilyRecordWriterBy
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 closeInternal();
 if (!iter.hasNext())
 {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java 
b/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
index 702cae3..2096055 100644
--- a/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
+++ b/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
@@ -40,7 +40,6 @@ import org.apache.cassandra.hadoop.HadoopCompat;
 import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.util.Progressable;
@@ -255,7 +254,6 @@ class CqlRecordWriter extends 
AbstractColumnFamilyRecordWriterMapString, ByteB
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 closeInternal();
 if (!iter.hasNext())
 {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java 
b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
index 6b22fac..1e6802b 100644
--- a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
+++ b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
@@ -35,7 +35,6 @@ import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.Hex;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.hadoop.mapreduce.*;
 import org.apache.pig.Expression;
 import org.apache.pig.ResourceSchema;
@@ -237,7 +236,6 @@ public class CassandraStorage extends 
AbstractCassandraStorage
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 cql3Table = true;
 }
 if (hasColumn)
@@ -727,7 +725,7 @@ public class 

[2/2] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread jmckenzie
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c65a9f5c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c65a9f5c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c65a9f5c

Branch: refs/heads/trunk
Commit: c65a9f5c63f5e1a1a495cbe82b3d11bb3c848c20
Parents: 2fc9f32 2ff9137
Author: Joshua McKenzie jmcken...@apache.org
Authored: Wed Jan 14 11:46:42 2015 -0600
Committer: Joshua McKenzie jmcken...@apache.org
Committed: Wed Jan 14 11:46:42 2015 -0600

--
 .../org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java| 2 --
 src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java   | 2 --
 src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java   | 4 +---
 src/java/org/apache/cassandra/hadoop/pig/CqlNativeStorage.java   | 1 -
 4 files changed, 1 insertion(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c65a9f5c/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
--

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c65a9f5c/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
--



cassandra git commit: Remove JVMStabilityInspector from hadoop code

2015-01-14 Thread jmckenzie
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 7804a53c9 - 2ff913767


Remove JVMStabilityInspector from hadoop code

Patch by jmckenzie; reviewed by bwilliams as follow-up for CASSANDRA-8292


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2ff91376
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2ff91376
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2ff91376

Branch: refs/heads/cassandra-2.1
Commit: 2ff913767437ae919a72ec00c2437aa7ce904f6e
Parents: 7804a53
Author: Joshua McKenzie jmcken...@apache.org
Authored: Wed Jan 14 11:45:26 2015 -0600
Committer: Joshua McKenzie jmcken...@apache.org
Committed: Wed Jan 14 11:45:26 2015 -0600

--
 .../org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java| 2 --
 src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java   | 2 --
 src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java   | 4 +---
 src/java/org/apache/cassandra/hadoop/pig/CqlNativeStorage.java   | 1 -
 4 files changed, 1 insertion(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java 
b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
index a1b04f0..d6a873b 100644
--- a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
+++ b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java
@@ -26,7 +26,6 @@ import java.util.*;
 import org.apache.cassandra.dht.Range;
 import org.apache.cassandra.dht.Token;
 import org.apache.cassandra.thrift.*;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.cassandra.utils.Pair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -199,7 +198,6 @@ final class ColumnFamilyRecordWriter extends 
AbstractColumnFamilyRecordWriterBy
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 closeInternal();
 if (!iter.hasNext())
 {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java 
b/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
index 702cae3..2096055 100644
--- a/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
+++ b/src/java/org/apache/cassandra/hadoop/cql3/CqlRecordWriter.java
@@ -40,7 +40,6 @@ import org.apache.cassandra.hadoop.HadoopCompat;
 import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.util.Progressable;
@@ -255,7 +254,6 @@ class CqlRecordWriter extends 
AbstractColumnFamilyRecordWriterMapString, ByteB
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 closeInternal();
 if (!iter.hasNext())
 {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ff91376/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
--
diff --git a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java 
b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
index 6b22fac..1e6802b 100644
--- a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
+++ b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
@@ -35,7 +35,6 @@ import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.Hex;
-import org.apache.cassandra.utils.JVMStabilityInspector;
 import org.apache.hadoop.mapreduce.*;
 import org.apache.pig.Expression;
 import org.apache.pig.ResourceSchema;
@@ -237,7 +236,6 @@ public class CassandraStorage extends 
AbstractCassandraStorage
 }
 catch (Exception e)
 {
-JVMStabilityInspector.inspectThrowable(e);
 cql3Table = true;
 }
 if (hasColumn)
@@ -727,7 +725,7 

[jira] [Assigned] (CASSANDRA-8616) sstable2json may result in commit log segments be written

2015-01-14 Thread Yuki Morishita (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yuki Morishita reassigned CASSANDRA-8616:
-

Assignee: Yuki Morishita  (was: Russ Hatch)

 sstable2json may result in commit log segments be written
 -

 Key: CASSANDRA-8616
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8616
 Project: Cassandra
  Issue Type: Bug
  Components: Tools
Reporter: Tyler Hobbs
Assignee: Yuki Morishita
 Fix For: 2.0.13


 There was a report of sstable2json causing commitlog segments to be written 
 out when run.  I haven't attempted to reproduce this yet, so that's all I 
 know for now.  Since sstable2json loads the conf and schema, I'm thinking 
 that it may inadvertently be triggering the commitlog code.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8616) sstable2json may result in commit log segments be written

2015-01-14 Thread Yuki Morishita (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8616?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277319#comment-14277319
 ] 

Yuki Morishita commented on CASSANDRA-8616:
---

yup

 sstable2json may result in commit log segments be written
 -

 Key: CASSANDRA-8616
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8616
 Project: Cassandra
  Issue Type: Bug
  Components: Tools
Reporter: Tyler Hobbs
Assignee: Yuki Morishita
 Fix For: 2.0.13


 There was a report of sstable2json causing commitlog segments to be written 
 out when run.  I haven't attempted to reproduce this yet, so that's all I 
 know for now.  Since sstable2json loads the conf and schema, I'm thinking 
 that it may inadvertently be triggering the commitlog code.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (CASSANDRA-8621) For streaming operations, when a socket is closed/reset, we should retry/reinitiate that stream

2015-01-14 Thread Jeremy Hanna (JIRA)
Jeremy Hanna created CASSANDRA-8621:
---

 Summary: For streaming operations, when a socket is closed/reset, 
we should retry/reinitiate that stream
 Key: CASSANDRA-8621
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8621
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Jeremy Hanna


Currently we have a setting (streaming_socket_timeout_in_ms) that will timeout 
and retry the stream operation in the case where tcp is idle for a period of 
time.  However in the case where the socket is closed or reset, we do not retry 
the operation.  This can happen for a number of reasons, including when a 
firewall sends a reset message on a socket during a streaming operation - think 
nodetool rebuild necessarily across DCs or repairs.

Doing a retry would make the streaming operations more resilient.  It would be 
good to log the retry clearly as well (with the stream session ID).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Assigned] (CASSANDRA-8621) For streaming operations, when a socket is closed/reset, we should retry/reinitiate that stream

2015-01-14 Thread Yuki Morishita (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8621?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yuki Morishita reassigned CASSANDRA-8621:
-

Assignee: Yuki Morishita

 For streaming operations, when a socket is closed/reset, we should 
 retry/reinitiate that stream
 ---

 Key: CASSANDRA-8621
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8621
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Jeremy Hanna
Assignee: Yuki Morishita

 Currently we have a setting (streaming_socket_timeout_in_ms) that will 
 timeout and retry the stream operation in the case where tcp is idle for a 
 period of time.  However in the case where the socket is closed or reset, we 
 do not retry the operation.  This can happen for a number of reasons, 
 including when a firewall sends a reset message on a socket during a 
 streaming operation - think nodetool rebuild necessarily across DCs or 
 repairs.
 Doing a retry would make the streaming operations more resilient.  It would 
 be good to log the retry clearly as well (with the stream session ID).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8621) For streaming operations, when a socket is closed/reset, we should retry/reinitiate that stream

2015-01-14 Thread Jeremy Hanna (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8621?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jeremy Hanna updated CASSANDRA-8621:

Description: 
Currently we have a setting (streaming_socket_timeout_in_ms) that will timeout 
and retry the stream operation in the case where tcp is idle for a period of 
time.  However in the case where the socket is closed or reset, we do not retry 
the operation.  This can happen for a number of reasons, including when a 
firewall sends a reset message on a socket during a streaming operation, such 
as nodetool rebuild necessarily across DCs or repairs.

Doing a retry would make the streaming operations more resilient.  It would be 
good to log the retry clearly as well (with the stream session ID).

  was:
Currently we have a setting (streaming_socket_timeout_in_ms) that will timeout 
and retry the stream operation in the case where tcp is idle for a period of 
time.  However in the case where the socket is closed or reset, we do not retry 
the operation.  This can happen for a number of reasons, including when a 
firewall sends a reset message on a socket during a streaming operation - think 
nodetool rebuild necessarily across DCs or repairs.

Doing a retry would make the streaming operations more resilient.  It would be 
good to log the retry clearly as well (with the stream session ID).


 For streaming operations, when a socket is closed/reset, we should 
 retry/reinitiate that stream
 ---

 Key: CASSANDRA-8621
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8621
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Jeremy Hanna
Assignee: Yuki Morishita

 Currently we have a setting (streaming_socket_timeout_in_ms) that will 
 timeout and retry the stream operation in the case where tcp is idle for a 
 period of time.  However in the case where the socket is closed or reset, we 
 do not retry the operation.  This can happen for a number of reasons, 
 including when a firewall sends a reset message on a socket during a 
 streaming operation, such as nodetool rebuild necessarily across DCs or 
 repairs.
 Doing a retry would make the streaming operations more resilient.  It would 
 be good to log the retry clearly as well (with the stream session ID).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


cassandra git commit: Try to fix flapping SSTableRewriterTest

2015-01-14 Thread marcuse
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 25d4a13d5 - 064144bd4


Try to fix flapping SSTableRewriterTest


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/064144bd
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/064144bd
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/064144bd

Branch: refs/heads/cassandra-2.1
Commit: 064144bd403e0327bf8e340ba54e857513078869
Parents: 25d4a13
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 18:08:02 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 18:08:27 2015 +0100

--
 test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/064144bd/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
--
diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java 
b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
index 88bb8dc..fbd627b 100644
--- a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
@@ -174,6 +174,7 @@ public class SSTableRewriterTest extends SchemaLoader
 int filecounts = 
assertFileCounts(sstables.iterator().next().descriptor.directory.list(), 0, 0);
 assertEquals(1, filecounts);
 cfs.truncateBlocking();
+Thread.sleep(1000); // make sure the deletion tasks have run etc
 validateCFS(cfs);
 }
 
@@ -467,6 +468,7 @@ public class SSTableRewriterTest extends SchemaLoader
 Thread.sleep(1000);
 assertFileCounts(s.descriptor.directory.list(), 0, 0);
 cfs.truncateBlocking();
+Thread.sleep(1000); // make sure the deletion tasks have run etc
 validateCFS(cfs);
 }
 



[2/2] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread marcuse
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/30a5597c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/30a5597c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/30a5597c

Branch: refs/heads/trunk
Commit: 30a5597ca2eb5b13330cb5843a9ee036beac63de
Parents: 270a05d 064144b
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 18:08:51 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 18:08:51 2015 +0100

--
 test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/30a5597c/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
--



[jira] [Commented] (CASSANDRA-8616) sstable2json may result in commit log segments be written

2015-01-14 Thread Tyler Hobbs (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8616?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277241#comment-14277241
 ] 

Tyler Hobbs commented on CASSANDRA-8616:


Thanks, [~rhatch].

[~yukim] do you want to take this one?

 sstable2json may result in commit log segments be written
 -

 Key: CASSANDRA-8616
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8616
 Project: Cassandra
  Issue Type: Bug
  Components: Tools
Reporter: Tyler Hobbs
Assignee: Russ Hatch
 Fix For: 2.0.13


 There was a report of sstable2json causing commitlog segments to be written 
 out when run.  I haven't attempted to reproduce this yet, so that's all I 
 know for now.  Since sstable2json loads the conf and schema, I'm thinking 
 that it may inadvertently be triggering the commitlog code.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


cassandra git commit: Fixup for #8558

2015-01-14 Thread slebresne
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.0 92f753957 - e16d76dc8


Fixup for #8558


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e16d76dc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e16d76dc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e16d76dc

Branch: refs/heads/cassandra-2.0
Commit: e16d76dc84022a37dc544183732bfe9e9fb3fd94
Parents: 92f7539
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:15:13 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:15:13 2015 +0100

--
 .../db/columniterator/IndexedSliceReader.java   | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e16d76dc/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index 4409484..2216725 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -337,7 +337,7 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 boolean inSlice = false;
 
 OnDiskAtom prefetchedCol;
-while ((prefetchedCol = prefetched.peek() ) != null)
+while ((prefetchedCol = prefetched.peek()) != null)
 {
 // col is before slice, we update the slice
 if (isColumnBeforeSliceStart(prefetchedCol))
@@ -351,12 +351,15 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 continue;
 }
 
-// Otherwise, we either move to the next slice or, if 
we have none (which can happen
-// because we unwind prefetched no matter what due to 
RT), we skip the cell
-if (hasMoreSlice())
-setNextSlice();
-else
-prefetched.poll();
+// Otherwise, we either move to the next slice. If we 
have no more slice, then
+// simply unwind prefetched entirely and add all RT.
+if (!setNextSlice())
+{
+while ((prefetchedCol = prefetched.poll()) != null)
+if (prefetchedCol instanceof RangeTombstone)
+blockColumns.addLast(prefetchedCol);
+break;
+}
 
 }
 // col is within slice, all columns
@@ -373,8 +376,11 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 prefetched.poll();
 }
 }
+
 if (!blockColumns.isEmpty())
 return true;
+else if (!hasMoreSlice())
+return false;
 }
 try
 {



[1/2] cassandra git commit: Fixup for #8558

2015-01-14 Thread slebresne
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 064144bd4 - 686b8104c


Fixup for #8558


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e16d76dc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e16d76dc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e16d76dc

Branch: refs/heads/cassandra-2.1
Commit: e16d76dc84022a37dc544183732bfe9e9fb3fd94
Parents: 92f7539
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:15:13 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:15:13 2015 +0100

--
 .../db/columniterator/IndexedSliceReader.java   | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e16d76dc/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index 4409484..2216725 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -337,7 +337,7 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 boolean inSlice = false;
 
 OnDiskAtom prefetchedCol;
-while ((prefetchedCol = prefetched.peek() ) != null)
+while ((prefetchedCol = prefetched.peek()) != null)
 {
 // col is before slice, we update the slice
 if (isColumnBeforeSliceStart(prefetchedCol))
@@ -351,12 +351,15 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 continue;
 }
 
-// Otherwise, we either move to the next slice or, if 
we have none (which can happen
-// because we unwind prefetched no matter what due to 
RT), we skip the cell
-if (hasMoreSlice())
-setNextSlice();
-else
-prefetched.poll();
+// Otherwise, we either move to the next slice. If we 
have no more slice, then
+// simply unwind prefetched entirely and add all RT.
+if (!setNextSlice())
+{
+while ((prefetchedCol = prefetched.poll()) != null)
+if (prefetchedCol instanceof RangeTombstone)
+blockColumns.addLast(prefetchedCol);
+break;
+}
 
 }
 // col is within slice, all columns
@@ -373,8 +376,11 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 prefetched.poll();
 }
 }
+
 if (!blockColumns.isEmpty())
 return true;
+else if (!hasMoreSlice())
+return false;
 }
 try
 {



cassandra git commit: Fix slice size calculation for 2ary index table slices

2015-01-14 Thread tylerhobbs
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.0 e16d76dc8 - 90780b550


Fix slice size calculation for 2ary index table slices

Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-8550


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/90780b55
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/90780b55
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/90780b55

Branch: refs/heads/cassandra-2.0
Commit: 90780b550f39bc318567ac53f8e8e7d797697f16
Parents: e16d76d
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:31:26 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:32:45 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 92bf422..45b2b9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,6 @@
 2.0.12:
+ * Use more efficient slice size for querying internal secondary
+   index tables (CASSANDRA-8550)
  * Fix potentially returning deleted rows with range tombstone (CASSANDRA-8558)
  * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
--
diff --git 
a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java 
b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
index b978021..9370133 100644
--- a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
+++ b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
@@ -107,9 +107,8 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 private int limit = filter.currentLimit();
 private int columnsCount = 0;
 
-private int meanColumns = 
Math.max(index.getIndexCfs().getMeanColumns(), 1);
-// We shouldn't fetch only 1 row as this provides buggy paging in 
case the first row doesn't satisfy all clauses
-private int rowsPerQuery = Math.max(Math.min(filter.maxRows(), 
filter.maxColumns() / meanColumns), 2);
+// We have to fetch at least two rows to avoid breaking paging if 
the first row doesn't satisfy all clauses
+private int indexCellsPerQuery = Math.max(2, 
Math.min(filter.maxColumns(), filter.maxRows()));
 
 public boolean needsFiltering()
 {
@@ -144,9 +143,9 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 
 if (indexColumns == null || indexColumns.isEmpty())
 {
-if (columnsRead  rowsPerQuery)
+if (columnsRead  indexCellsPerQuery)
 {
-logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, rowsPerQuery);
+logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, indexCellsPerQuery);
 return makeReturn(currentKey, data);
 }
 
@@ -159,7 +158,7 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
  
lastSeenPrefix,
  
endPrefix,
  
false,
- 
rowsPerQuery,
+ 
indexCellsPerQuery,
  
filter.timestamp);
 ColumnFamily indexRow = 
index.getIndexCfs().getColumnFamily(indexFilter);
 if (indexRow == null || indexRow.getColumnCount() == 0)



[jira] [Commented] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// for local fil

2015-01-14 Thread Brandon Williams (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277277#comment-14277277
 ] 

Brandon Williams commented on CASSANDRA-8292:
-

+1

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, 8292_v2.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Ariel Weisberg (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277276#comment-14277276
 ] 

Ariel Weisberg commented on CASSANDRA-8614:
---

I am trying to disable the instruction so I can get the fallback and measure. I 
read in some places that Adler32 was also supposed to get accelerated, but I 
only see stuff for CRC32. Doesn't mean it doesn't do it on the sly, but we 
might get the speedup even using Adler32.

The JVM is accepting -XX:-UseCLMUL -XX:-UseCRC32Intrinsic, but it doesn't seem 
to work. I keep getting 13 gigabytes/second. Something doesn't add up. The 
speed up going from 128-bytes to 1 megabyte is also suspicious.

I also got 13 gigabytes/sec out of JDK 7. ??!!!?? I'm definitely not measuring 
what I think I'm measuring.

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
Assignee: Benedict
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-8614) Select optimal CRC32 implementation at runtime

2015-01-14 Thread Robert Stupp (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8614?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277347#comment-14277347
 ] 

Robert Stupp commented on CASSANDRA-8614:
-

The intrinsic is definitely not present in Java 7. zlib used in JDK8 is quite 
old (2010) - and support for Intel-CPU-CRC has been added to [zlib mid 2014 
|https://github.com/jtkukunas/zlib/blob/master/crc_folding.c]. Maybe a newer 
zlib via ld_preload?

 Select optimal CRC32 implementation at runtime
 --

 Key: CASSANDRA-8614
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8614
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Ariel Weisberg
Assignee: Benedict
  Labels: performance
 Attachments: 8614.patch, Sample.java


 JDK 8 has support for an intrinsic for CRC32 that runs at 12-13 gigabytes/sec 
 per core in my quick and dirty test. PureJavaCRC32 is  800 megabytes/sec if 
 I recall and it has a lookup table that evicts random cache lines every time 
 it runs.
 In order to capture the benefit of that when it is available we can select a 
 CRC32 implementation at startup in a static block.
 If JDK 8 is not what is running we can fall back to the existing 
 PureJavaCRC32 implementation.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[1/2] cassandra git commit: Try to fix flapping SSTableRewriterTest

2015-01-14 Thread marcuse
Repository: cassandra
Updated Branches:
  refs/heads/trunk 270a05d63 - 30a5597ca


Try to fix flapping SSTableRewriterTest


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/064144bd
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/064144bd
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/064144bd

Branch: refs/heads/trunk
Commit: 064144bd403e0327bf8e340ba54e857513078869
Parents: 25d4a13
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 18:08:02 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 18:08:27 2015 +0100

--
 test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/064144bd/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
--
diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java 
b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
index 88bb8dc..fbd627b 100644
--- a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
@@ -174,6 +174,7 @@ public class SSTableRewriterTest extends SchemaLoader
 int filecounts = 
assertFileCounts(sstables.iterator().next().descriptor.directory.list(), 0, 0);
 assertEquals(1, filecounts);
 cfs.truncateBlocking();
+Thread.sleep(1000); // make sure the deletion tasks have run etc
 validateCFS(cfs);
 }
 
@@ -467,6 +468,7 @@ public class SSTableRewriterTest extends SchemaLoader
 Thread.sleep(1000);
 assertFileCounts(s.descriptor.directory.list(), 0, 0);
 cfs.truncateBlocking();
+Thread.sleep(1000); // make sure the deletion tasks have run etc
 validateCFS(cfs);
 }
 



[2/2] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread slebresne
Merge branch 'cassandra-2.0' into cassandra-2.1


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/686b8104
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/686b8104
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/686b8104

Branch: refs/heads/cassandra-2.1
Commit: 686b8104ca7469aee18b9ad4fa8b181e84dca427
Parents: 064144b e16d76d
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:15:36 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:15:36 2015 +0100

--
 .../db/columniterator/IndexedSliceReader.java   | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/686b8104/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--



[jira] [Updated] (CASSANDRA-8292) From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in variable: [cassandra.config]. Please prefix the file with file:/// for local files

2015-01-14 Thread Joshua McKenzie (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8292?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Joshua McKenzie updated CASSANDRA-8292:
---
Attachment: 8292_v2.txt

The removal of JVMStabilityInspector looks good.  I've attached a v2 that's 
modified to remove the extra imports that were also added on your patch.txt.

I should have thought of this when we put in the original fix - sorry about 
that!

 From Pig: org.apache.cassandra.exceptions.ConfigurationException: Expecting 
 URI in variable: [cassandra.config].  Please prefix the file with file:/// 
 for local files or file://server/ for remote files.
 

 Key: CASSANDRA-8292
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8292
 Project: Cassandra
  Issue Type: Bug
Reporter: Brandon Kearby
Assignee: Joshua McKenzie
 Fix For: 2.1.3

 Attachments: 8292_v1.txt, 8292_v2.txt, patch.txt


 Getting this error from Pig:
 Looks like the client side hadoop code is trying to locate the cassandra.yaml.
 {code}
 ERROR org.apache.cassandra.config.DatabaseDescriptor - Fatal configuration 
 error
 org.apache.cassandra.exceptions.ConfigurationException: Expecting URI in 
 variable: [cassandra.config].  Please prefix the file with file:/// for local 
 files or file://server/ for remote files.  Aborting.
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.getStorageConfigURL(YamlConfigurationLoader.java:73)
   at 
 org.apache.cassandra.config.YamlConfigurationLoader.loadConfig(YamlConfigurationLoader.java:84)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.loadConfig(DatabaseDescriptor.java:158)
   at 
 org.apache.cassandra.config.DatabaseDescriptor.clinit(DatabaseDescriptor.java:133)
   at 
 org.apache.cassandra.utils.JVMStabilityInspector.inspectThrowable(JVMStabilityInspector.java:54)
   at 
 org.apache.cassandra.hadoop.HadoopCompat.clinit(HadoopCompat.java:135)
   at 
 org.apache.cassandra.hadoop.AbstractColumnFamilyInputFormat.getSplits(AbstractColumnFamilyInputFormat.java:120)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:273)
   at 
 org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1014)
   at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1031)
   at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:943)
   at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:896)
   at java.security.AccessController.doPrivileged(Native Method)
   at javax.security.auth.Subject.doAs(Subject.java:422)
   at 
 org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
   at 
 org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:896)
   at org.apache.hadoop.mapreduce.Job.submit(Job.java:531)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:318)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.startReadyJobs(JobControl.java:238)
   at 
 org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.run(JobControl.java:269)
   at java.lang.Thread.run(Thread.java:745)
   at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$1.run(MapReduceLauncher.java:260)
 Expecting URI in variable: [cassandra.config].  Please prefix the file with 
 file:/// for local files or file://server/ for remote files.  Aborting.
 Fatal configuration error; unable to start. See log for stacktrace.
 {code}
 Sample Pig Script:
 {code}
 grunt sigs = load 'cql://socialdata/signal' using 
 org.apache.cassandra.hadoop.pig.CqlNativeStorage();
 grunt a = limit sigs 5;  
 
 grunt dump a;
 {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


cassandra git commit: Fix SSTableRewriterTest on Windows

2015-01-14 Thread jmckenzie
Repository: cassandra
Updated Branches:
  refs/heads/trunk 88b51539f - 2fc9f3292


Fix SSTableRewriterTest on Windows

Patch by jmckenzie; reviewed by marcuse for CASSANDRA-8598


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2fc9f329
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2fc9f329
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2fc9f329

Branch: refs/heads/trunk
Commit: 2fc9f3292a7e7164662ad50617fe76a21e697395
Parents: 88b5153
Author: Joshua McKenzie jmcken...@apache.org
Authored: Wed Jan 14 11:34:58 2015 -0600
Committer: Joshua McKenzie jmcken...@apache.org
Committed: Wed Jan 14 11:37:23 2015 -0600

--
 .../cassandra/io/sstable/SSTableRewriterTest.java   | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2fc9f329/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
--
diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java 
b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
index afad979..3703d54 100644
--- a/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/SSTableRewriterTest.java
@@ -214,21 +214,27 @@ public class SSTableRewriterTest extends SchemaLoader
 for (int i = 0; i  100; i++)
 cf.addColumn(Util.cellname(i), ByteBuffer.allocate(1000), 1);
 File dir = cfs.directories.getDirectoryForNewSSTables();
+
 SSTableWriter writer = getWriter(cfs, dir);
 for (int i = 0; i  500; i++)
 
writer.append(StorageService.getPartitioner().decorateKey(ByteBufferUtil.bytes(i)),
 cf);
 SSTableReader s = writer.openEarly(1000);
 assertFileCounts(dir.list(), 2, 3);
+
 for (int i = 500; i  1000; i++)
 
writer.append(StorageService.getPartitioner().decorateKey(ByteBufferUtil.bytes(i)),
 cf);
 SSTableReader s2 = writer.openEarly(1000);
+
 assertTrue(s != s2);
 assertFileCounts(dir.list(), 2, 3);
-s.markObsolete();
+
+s.setReplacedBy(s2);
+s2.markObsolete();
 s.releaseReference();
-Thread.sleep(1000);
-assertFileCounts(dir.list(), 0, 3);
+s2.releaseReference();
+
 writer.abort(false);
+
 Thread.sleep(1000);
 int datafiles = assertFileCounts(dir.list(), 0, 0);
 assertEquals(datafiles, 0);



[jira] [Created] (CASSANDRA-8620) Bootstrap session hanging indefinitely

2015-01-14 Thread Adam Horwich (JIRA)
Adam Horwich created CASSANDRA-8620:
---

 Summary: Bootstrap session hanging indefinitely
 Key: CASSANDRA-8620
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8620
 Project: Cassandra
  Issue Type: Bug
 Environment: Debian 7, Oracle JDK 1.7.0_51, AWS + GCE
Reporter: Adam Horwich


Hi! We have been running a relatively small 2.1.2 cluster over 2 DCs for a few 
months with ~100GB load per node and a RF=3 and over the last few weeks have 
been trying to scale up capacity. 

We've been recently seeing scenarios in which the Bootstrap or Unbootstrap 
streaming process hangs indefinitely for one or more sessions on the receiver 
without stacktrace or exception. This does not happen every time, and we do not 
get into this state with the same sender every time. When the receiver is in a 
hung state, the following can be found in the thread dump:

The Stream-IN thread for one or more sessions is blocked in the following 
state: 
Thread 24942: (state = BLOCKED)
 - sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may 
be imprecise)
 - java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14, 
line=186 (Compiled frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await() 
@bci=42, line=2043 (Compiled frame)
 - java.util.concurrent.ArrayBlockingQueue.take() @bci=20, line=374 (Compiled 
frame)
 - org.apache.cassandra.streaming.compress.CompressedInputStream.read() 
@bci=31, line=89 (Compiled frame)
 - java.io.DataInputStream.readUnsignedShort() @bci=4, line=337 (Compiled frame)
 - org.apache.cassandra.utils.BytesReadTracker.readUnsignedShort() @bci=4, 
line=140 (Compiled frame)
 - org.apache.cassandra.utils.ByteBufferUtil.readShortLength(java.io.DataInput) 
@bci=1, line=317 (Compiled frame)
 - 
org.apache.cassandra.utils.ByteBufferUtil.readWithShortLength(java.io.DataInput)
 @bci=2, line=327 (Compiled frame)
 - 
org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(java.io.DataInput)
 @bci=5, line=397 (Compiled frame)
 - 
org.apache.cassandra.db.composites.AbstractCType$Serializer.deserialize(java.io.DataInput)
 @bci=2, line=381 (Compiled frame)
 - 
org.apache.cassandra.db.OnDiskAtom$Serializer.deserializeFromSSTable(java.io.DataInput,
 org.apache.cassandra.db.ColumnSerializer$Flag, int, 
org.apache.cassandra.io.sstable.Descriptor$Version) @bci=10, line=75 (Compiled 
frame)
 - org.apache.cassandra.db.AbstractCell$1.computeNext() @bci=25, line=52 
(Compiled frame)
 - org.apache.cassandra.db.AbstractCell$1.computeNext() @bci=1, line=46 
(Compiled frame)
 - com.google.common.collect.AbstractIterator.tryToComputeNext() @bci=9, 
line=143 (Compiled frame)
 - com.google.common.collect.AbstractIterator.hasNext() @bci=61, line=138 
(Compiled frame)
 - 
org.apache.cassandra.io.sstable.SSTableWriter.appendFromStream(org.apache.cassandra.db.DecoratedKey,
 org.apache.cassandra.config.CFMetaData, java.io.DataInput, 
org.apache.cassandra.io.sstable.Descriptor$Version) @bci=320, line=283 
(Compiled frame)
 - 
org.apache.cassandra.streaming.StreamReader.writeRow(org.apache.cassandra.io.sstable.SSTableWriter,
 java.io.DataInput, org.apache.cassandra.db.ColumnFamilyStore) @bci=26, 
line=157 (Compiled frame)
 - 
org.apache.cassandra.streaming.compress.CompressedStreamReader.read(java.nio.channels.ReadableByteChannel)
 @bci=258, line=89 (Compiled frame)
 - 
org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(java.nio.channels.ReadableByteChannel,
 int, org.apache.cassandra.streaming.StreamSession) @bci=69, line=48 
(Interpreted frame)
 - 
org.apache.cassandra.streaming.messages.IncomingFileMessage$1.deserialize(java.nio.channels.ReadableByteChannel,
 int, org.apache.cassandra.streaming.StreamSession) @bci=4, line=38 
(Interpreted frame)
 - 
org.apache.cassandra.streaming.messages.StreamMessage.deserialize(java.nio.channels.ReadableByteChannel,
 int, org.apache.cassandra.streaming.StreamSession) @bci=37, line=55 
(Interpreted frame)
 - 
org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run() 
@bci=24, line=245 (Interpreted frame)
 - java.lang.Thread.run() @bci=11, line=744 (Interpreted frame)

Debug logging shows that the receiver is still reading the file it is receiving 
from the receiver and has not yet sent an ACK.

The receiver is waiting for more data to finish writing its row, and the sender 
is not sending any more data. On both the receiver and sender there is a large 
amount of data (~5MB) stuck in the Recv-Q (receiver) and Send-Q (sender). 

We've been trying to diagnose this issue internally, but it's difficult to 
create a reliably reproducible scenario. We have so far found that restarting 
all nodes in the cluster and ensuring that a cleanup has been performed helps 
mitigate the problem (though a cleanup without restart can still result in a 
hung state). However, it's 

[jira] [Commented] (CASSANDRA-8598) Windows - SSTableRewriterTest fails on trunk

2015-01-14 Thread Marcus Eriksson (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8598?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277244#comment-14277244
 ] 

Marcus Eriksson commented on CASSANDRA-8598:


+1

 Windows - SSTableRewriterTest fails on trunk
 

 Key: CASSANDRA-8598
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8598
 Project: Cassandra
  Issue Type: Bug
Reporter: Joshua McKenzie
Assignee: Joshua McKenzie
Priority: Minor
  Labels: Windows
 Fix For: 3.0

 Attachments: 8598_v1.txt


 Right at the top of the test, we see:
 {noformat}
 [junit] ERROR 18:15:05 Unable to delete 
 build\test\cassandra\data;0\SSTableRewriterTest\Standard1-e63f49c09a8611e4bebb8ff5e6ab1035\tmplink-la-27-big-Data.db
  (it will be removed on server restart; we'll also retry after GC)
 [junit] ERROR 18:15:05 Unable to delete 
 build\test\cassandra\data;0\SSTableRewriterTest\Standard1-e63f49c09a8611e4bebb8ff5e6ab1035\tmplink-la-27-big-Data.db
  (it will be removed on server restart; we'll also retry after GC)
 [junit] -  ---
 [junit] Testcase: 
 testFileRemoval(org.apache.cassandra.io.sstable.SSTableRewriterTest): 
 FAILED
 [junit] expected:0 but was:2
 [junit] junit.framework.AssertionFailedError: expected:0 but was:2
 [junit] at 
 org.apache.cassandra.io.sstable.SSTableRewriterTest.assertFileCounts(SSTableRewriterTest.java:758)
 [junit] at 
 org.apache.cassandra.io.sstable.SSTableRewriterTest.testFileRemoval(SSTableRewriterTest.java:229)
 {noformat}
 The rest cascade after that.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[3/3] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread slebresne
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4937451f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4937451f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4937451f

Branch: refs/heads/trunk
Commit: 4937451f55e07db3d731fb7ca6f7379978d55a47
Parents: 30a5597 686b810
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:18:00 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:18:00 2015 +0100

--
 .../sstable/format/big/IndexedSliceReader.java  | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4937451f/src/java/org/apache/cassandra/io/sstable/format/big/IndexedSliceReader.java
--
diff --cc 
src/java/org/apache/cassandra/io/sstable/format/big/IndexedSliceReader.java
index 45e30d4,000..6db9c3d
mode 100644,00..100644
--- 
a/src/java/org/apache/cassandra/io/sstable/format/big/IndexedSliceReader.java
+++ 
b/src/java/org/apache/cassandra/io/sstable/format/big/IndexedSliceReader.java
@@@ -1,536 -1,0 +1,542 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * License); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + * http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an AS IS BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.cassandra.io.sstable.format.big;
 +
 +import java.io.IOException;
 +import java.util.ArrayDeque;
 +import java.util.Deque;
 +import java.util.List;
 +
 +import com.google.common.collect.AbstractIterator;
 +
 +import org.apache.cassandra.db.*;
 +import org.apache.cassandra.db.columniterator.OnDiskAtomIterator;
 +import org.apache.cassandra.db.composites.CellNameType;
 +import org.apache.cassandra.db.composites.Composite;
 +import org.apache.cassandra.db.filter.ColumnSlice;
 +import org.apache.cassandra.io.sstable.CorruptSSTableException;
 +import org.apache.cassandra.io.sstable.IndexHelper;
 +import org.apache.cassandra.io.sstable.IndexHelper.IndexInfo;
 +import org.apache.cassandra.io.sstable.format.SSTableReader;
 +import org.apache.cassandra.io.util.FileDataInput;
 +import org.apache.cassandra.io.util.FileMark;
 +import org.apache.cassandra.tracing.Tracing;
 +import org.apache.cassandra.utils.ByteBufferUtil;
 +
 +/**
 + * This is a reader that finds the block for a starting column and returns 
blocks before/after it for each next call.
 + * This function assumes that the CF is sorted by name and exploits the name 
index.
 + */
 +class IndexedSliceReader extends AbstractIteratorOnDiskAtom implements 
OnDiskAtomIterator
 +{
 +private final ColumnFamily emptyColumnFamily;
 +
 +private final SSTableReader sstable;
 +private final ListIndexHelper.IndexInfo indexes;
 +private final FileDataInput originalInput;
 +private FileDataInput file;
 +private final boolean reversed;
 +private final ColumnSlice[] slices;
 +private final BlockFetcher fetcher;
 +private final DequeOnDiskAtom blockColumns = new 
ArrayDequeOnDiskAtom();
 +private final CellNameType comparator;
 +
 +// Holds range tombstone in reverse queries. See addColumn()
 +private final DequeOnDiskAtom rangeTombstonesReversed;
 +
 +/**
 + * This slice reader assumes that slices are sorted correctly, e.g. that 
for forward lookup slices are in
 + * lexicographic order of start elements and that for reverse lookup they 
are in reverse lexicographic order of
 + * finish (reverse start) elements. i.e. forward: [a,b],[d,e],[g,h] 
reverse: [h,g],[e,d],[b,a]. This reader also
 + * assumes that validation has been performed in terms of intervals (no 
overlapping intervals).
 + */
 +IndexedSliceReader(SSTableReader sstable, RowIndexEntry indexEntry, 
FileDataInput input, ColumnSlice[] slices, boolean reversed)
 +{
 +Tracing.trace(Seeking to partition indexed section in data file);
 +this.sstable = sstable;
 +this.originalInput = input;
 +this.reversed = reversed;
 +this.slices = slices;
 +this.comparator = 

[1/3] cassandra git commit: Fixup for #8558

2015-01-14 Thread slebresne
Repository: cassandra
Updated Branches:
  refs/heads/trunk 30a5597ca - 4937451f5


Fixup for #8558


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e16d76dc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e16d76dc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e16d76dc

Branch: refs/heads/trunk
Commit: e16d76dc84022a37dc544183732bfe9e9fb3fd94
Parents: 92f7539
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:15:13 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:15:13 2015 +0100

--
 .../db/columniterator/IndexedSliceReader.java   | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e16d76dc/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index 4409484..2216725 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -337,7 +337,7 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 boolean inSlice = false;
 
 OnDiskAtom prefetchedCol;
-while ((prefetchedCol = prefetched.peek() ) != null)
+while ((prefetchedCol = prefetched.peek()) != null)
 {
 // col is before slice, we update the slice
 if (isColumnBeforeSliceStart(prefetchedCol))
@@ -351,12 +351,15 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 continue;
 }
 
-// Otherwise, we either move to the next slice or, if 
we have none (which can happen
-// because we unwind prefetched no matter what due to 
RT), we skip the cell
-if (hasMoreSlice())
-setNextSlice();
-else
-prefetched.poll();
+// Otherwise, we either move to the next slice. If we 
have no more slice, then
+// simply unwind prefetched entirely and add all RT.
+if (!setNextSlice())
+{
+while ((prefetchedCol = prefetched.poll()) != null)
+if (prefetchedCol instanceof RangeTombstone)
+blockColumns.addLast(prefetchedCol);
+break;
+}
 
 }
 // col is within slice, all columns
@@ -373,8 +376,11 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 prefetched.poll();
 }
 }
+
 if (!blockColumns.isEmpty())
 return true;
+else if (!hasMoreSlice())
+return false;
 }
 try
 {



[2/3] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread slebresne
Merge branch 'cassandra-2.0' into cassandra-2.1


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/686b8104
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/686b8104
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/686b8104

Branch: refs/heads/trunk
Commit: 686b8104ca7469aee18b9ad4fa8b181e84dca427
Parents: 064144b e16d76d
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 18:15:36 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 18:15:36 2015 +0100

--
 .../db/columniterator/IndexedSliceReader.java   | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/686b8104/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--



[1/3] cassandra git commit: Fix slice size calculation for 2ary index table slices

2015-01-14 Thread tylerhobbs
Repository: cassandra
Updated Branches:
  refs/heads/trunk 4937451f5 - 88b51539f


Fix slice size calculation for 2ary index table slices

Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-8550


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/90780b55
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/90780b55
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/90780b55

Branch: refs/heads/trunk
Commit: 90780b550f39bc318567ac53f8e8e7d797697f16
Parents: e16d76d
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:31:26 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:32:45 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 92bf422..45b2b9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,6 @@
 2.0.12:
+ * Use more efficient slice size for querying internal secondary
+   index tables (CASSANDRA-8550)
  * Fix potentially returning deleted rows with range tombstone (CASSANDRA-8558)
  * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
--
diff --git 
a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java 
b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
index b978021..9370133 100644
--- a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
+++ b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
@@ -107,9 +107,8 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 private int limit = filter.currentLimit();
 private int columnsCount = 0;
 
-private int meanColumns = 
Math.max(index.getIndexCfs().getMeanColumns(), 1);
-// We shouldn't fetch only 1 row as this provides buggy paging in 
case the first row doesn't satisfy all clauses
-private int rowsPerQuery = Math.max(Math.min(filter.maxRows(), 
filter.maxColumns() / meanColumns), 2);
+// We have to fetch at least two rows to avoid breaking paging if 
the first row doesn't satisfy all clauses
+private int indexCellsPerQuery = Math.max(2, 
Math.min(filter.maxColumns(), filter.maxRows()));
 
 public boolean needsFiltering()
 {
@@ -144,9 +143,9 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 
 if (indexColumns == null || indexColumns.isEmpty())
 {
-if (columnsRead  rowsPerQuery)
+if (columnsRead  indexCellsPerQuery)
 {
-logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, rowsPerQuery);
+logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, indexCellsPerQuery);
 return makeReturn(currentKey, data);
 }
 
@@ -159,7 +158,7 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
  
lastSeenPrefix,
  
endPrefix,
  
false,
- 
rowsPerQuery,
+ 
indexCellsPerQuery,
  
filter.timestamp);
 ColumnFamily indexRow = 
index.getIndexCfs().getColumnFamily(indexFilter);
 if (indexRow == null || indexRow.getColumnCount() == 0)



[2/3] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread tylerhobbs
Merge branch 'cassandra-2.0' into cassandra-2.1

Conflicts:
CHANGES.txt


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7804a53c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7804a53c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7804a53c

Branch: refs/heads/trunk
Commit: 7804a53c90b24820570cbc5c82043517489abbe5
Parents: 686b810 90780b5
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:34:28 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:34:28 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/7804a53c/CHANGES.txt
--
diff --cc CHANGES.txt
index 8f312a7,45b2b9c..d2802ad
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,59 -1,8 +1,61 @@@
 -2.0.12:
 +2.1.3
 + * Fix NPE when passing wrong argument in ALTER TABLE statement 
(CASSANDRA-8355)
 + * Pig: Refactor and deprecate CqlStorage (CASSANDRA-8599)
 + * Don't reuse the same cleanup strategy for all sstables (CASSANDRA-8537)
 + * Fix case-sensitivity of index name on CREATE and DROP INDEX
 +   statements (CASSANDRA-8365)
 + * Better detection/logging for corruption in compressed sstables 
(CASSANDRA-8192)
 + * Use the correct repairedAt value when closing writer (CASSANDRA-8570)
 + * (cqlsh) Handle a schema mismatch being detected on startup (CASSANDRA-8512)
 + * Properly calculate expected write size during compaction (CASSANDRA-8532)
 + * Invalidate affected prepared statements when a table's columns
 +   are altered (CASSANDRA-7910)
 + * Stress - user defined writes should populate sequentally (CASSANDRA-8524)
 + * Fix regression in SSTableRewriter causing some rows to become unreadable 
 +   during compaction (CASSANDRA-8429)
 + * Run major compactions for repaired/unrepaired in parallel (CASSANDRA-8510)
 + * (cqlsh) Fix compression options in DESCRIBE TABLE output when compression
 +   is disabled (CASSANDRA-8288)
 + * (cqlsh) Fix DESCRIBE output after keyspaces are altered (CASSANDRA-7623)
 + * Make sure we set lastCompactedKey correctly (CASSANDRA-8463)
 + * (cqlsh) Fix output of CONSISTENCY command (CASSANDRA-8507)
 + * (cqlsh) Fixed the handling of LIST statements (CASSANDRA-8370)
 + * Make sstablescrub check leveled manifest again (CASSANDRA-8432)
 + * Check first/last keys in sstable when giving out positions (CASSANDRA-8458)
 + * Disable mmap on Windows (CASSANDRA-6993)
 + * Add missing ConsistencyLevels to cassandra-stress (CASSANDRA-8253)
 + * Add auth support to cassandra-stress (CASSANDRA-7985)
 + * Fix ArrayIndexOutOfBoundsException when generating error message
 +   for some CQL syntax errors (CASSANDRA-8455)
 + * Scale memtable slab allocation logarithmically (CASSANDRA-7882)
 + * cassandra-stress simultaneous inserts over same seed (CASSANDRA-7964)
 + * Reduce cassandra-stress sampling memory requirements (CASSANDRA-7926)
 + * Ensure memtable flush cannot expire commit log entries from its future 
(CASSANDRA-8383)
 + * Make read defrag async to reclaim memtables (CASSANDRA-8459)
 + * Remove tmplink files for offline compactions (CASSANDRA-8321)
 + * Reduce maxHintsInProgress (CASSANDRA-8415)
 + * BTree updates may call provided update function twice (CASSANDRA-8018)
 + * Release sstable references after anticompaction (CASSANDRA-8386)
 + * Handle abort() in SSTableRewriter properly (CASSANDRA-8320)
 + * Fix high size calculations for prepared statements (CASSANDRA-8231)
 + * Centralize shared executors (CASSANDRA-8055)
 + * Fix filtering for CONTAINS (KEY) relations on frozen collection
 +   clustering columns when the query is restricted to a single
 +   partition (CASSANDRA-8203)
 + * Do more aggressive entire-sstable TTL expiry checks (CASSANDRA-8243)
 + * Add more log info if readMeter is null (CASSANDRA-8238)
 + * add check of the system wall clock time at startup (CASSANDRA-8305)
 + * Support for frozen collections (CASSANDRA-7859)
 + * Fix overflow on histogram computation (CASSANDRA-8028)
 + * Have paxos reuse the timestamp generation of normal queries 
(CASSANDRA-7801)
 + * Fix incremental repair not remove parent session on remote (CASSANDRA-8291)
 + * Improve JBOD disk utilization (CASSANDRA-7386)
 + * Log failed host when preparing incremental repair (CASSANDRA-8228)
 + * Force config client mode in CQLSSTableWriter (CASSANDRA-8281)
 +Merged from 2.0:
+  * Use more efficient slice size for querying internal secondary
+index tables (CASSANDRA-8550)
   * Fix potentially returning deleted rows with range tombstone 
(CASSANDRA-8558)
 - * Make sure we unmark 

[2/2] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread tylerhobbs
Merge branch 'cassandra-2.0' into cassandra-2.1

Conflicts:
CHANGES.txt


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7804a53c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7804a53c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7804a53c

Branch: refs/heads/cassandra-2.1
Commit: 7804a53c90b24820570cbc5c82043517489abbe5
Parents: 686b810 90780b5
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:34:28 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:34:28 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/7804a53c/CHANGES.txt
--
diff --cc CHANGES.txt
index 8f312a7,45b2b9c..d2802ad
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,59 -1,8 +1,61 @@@
 -2.0.12:
 +2.1.3
 + * Fix NPE when passing wrong argument in ALTER TABLE statement 
(CASSANDRA-8355)
 + * Pig: Refactor and deprecate CqlStorage (CASSANDRA-8599)
 + * Don't reuse the same cleanup strategy for all sstables (CASSANDRA-8537)
 + * Fix case-sensitivity of index name on CREATE and DROP INDEX
 +   statements (CASSANDRA-8365)
 + * Better detection/logging for corruption in compressed sstables 
(CASSANDRA-8192)
 + * Use the correct repairedAt value when closing writer (CASSANDRA-8570)
 + * (cqlsh) Handle a schema mismatch being detected on startup (CASSANDRA-8512)
 + * Properly calculate expected write size during compaction (CASSANDRA-8532)
 + * Invalidate affected prepared statements when a table's columns
 +   are altered (CASSANDRA-7910)
 + * Stress - user defined writes should populate sequentally (CASSANDRA-8524)
 + * Fix regression in SSTableRewriter causing some rows to become unreadable 
 +   during compaction (CASSANDRA-8429)
 + * Run major compactions for repaired/unrepaired in parallel (CASSANDRA-8510)
 + * (cqlsh) Fix compression options in DESCRIBE TABLE output when compression
 +   is disabled (CASSANDRA-8288)
 + * (cqlsh) Fix DESCRIBE output after keyspaces are altered (CASSANDRA-7623)
 + * Make sure we set lastCompactedKey correctly (CASSANDRA-8463)
 + * (cqlsh) Fix output of CONSISTENCY command (CASSANDRA-8507)
 + * (cqlsh) Fixed the handling of LIST statements (CASSANDRA-8370)
 + * Make sstablescrub check leveled manifest again (CASSANDRA-8432)
 + * Check first/last keys in sstable when giving out positions (CASSANDRA-8458)
 + * Disable mmap on Windows (CASSANDRA-6993)
 + * Add missing ConsistencyLevels to cassandra-stress (CASSANDRA-8253)
 + * Add auth support to cassandra-stress (CASSANDRA-7985)
 + * Fix ArrayIndexOutOfBoundsException when generating error message
 +   for some CQL syntax errors (CASSANDRA-8455)
 + * Scale memtable slab allocation logarithmically (CASSANDRA-7882)
 + * cassandra-stress simultaneous inserts over same seed (CASSANDRA-7964)
 + * Reduce cassandra-stress sampling memory requirements (CASSANDRA-7926)
 + * Ensure memtable flush cannot expire commit log entries from its future 
(CASSANDRA-8383)
 + * Make read defrag async to reclaim memtables (CASSANDRA-8459)
 + * Remove tmplink files for offline compactions (CASSANDRA-8321)
 + * Reduce maxHintsInProgress (CASSANDRA-8415)
 + * BTree updates may call provided update function twice (CASSANDRA-8018)
 + * Release sstable references after anticompaction (CASSANDRA-8386)
 + * Handle abort() in SSTableRewriter properly (CASSANDRA-8320)
 + * Fix high size calculations for prepared statements (CASSANDRA-8231)
 + * Centralize shared executors (CASSANDRA-8055)
 + * Fix filtering for CONTAINS (KEY) relations on frozen collection
 +   clustering columns when the query is restricted to a single
 +   partition (CASSANDRA-8203)
 + * Do more aggressive entire-sstable TTL expiry checks (CASSANDRA-8243)
 + * Add more log info if readMeter is null (CASSANDRA-8238)
 + * add check of the system wall clock time at startup (CASSANDRA-8305)
 + * Support for frozen collections (CASSANDRA-7859)
 + * Fix overflow on histogram computation (CASSANDRA-8028)
 + * Have paxos reuse the timestamp generation of normal queries 
(CASSANDRA-7801)
 + * Fix incremental repair not remove parent session on remote (CASSANDRA-8291)
 + * Improve JBOD disk utilization (CASSANDRA-7386)
 + * Log failed host when preparing incremental repair (CASSANDRA-8228)
 + * Force config client mode in CQLSSTableWriter (CASSANDRA-8281)
 +Merged from 2.0:
+  * Use more efficient slice size for querying internal secondary
+index tables (CASSANDRA-8550)
   * Fix potentially returning deleted rows with range tombstone 
(CASSANDRA-8558)
 - * Make sure we unmark 

[1/2] cassandra git commit: Fix slice size calculation for 2ary index table slices

2015-01-14 Thread tylerhobbs
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 686b8104c - 7804a53c9


Fix slice size calculation for 2ary index table slices

Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-8550


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/90780b55
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/90780b55
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/90780b55

Branch: refs/heads/cassandra-2.1
Commit: 90780b550f39bc318567ac53f8e8e7d797697f16
Parents: e16d76d
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:31:26 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:32:45 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 92bf422..45b2b9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,6 @@
 2.0.12:
+ * Use more efficient slice size for querying internal secondary
+   index tables (CASSANDRA-8550)
  * Fix potentially returning deleted rows with range tombstone (CASSANDRA-8558)
  * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/90780b55/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
--
diff --git 
a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java 
b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
index b978021..9370133 100644
--- a/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
+++ b/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
@@ -107,9 +107,8 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 private int limit = filter.currentLimit();
 private int columnsCount = 0;
 
-private int meanColumns = 
Math.max(index.getIndexCfs().getMeanColumns(), 1);
-// We shouldn't fetch only 1 row as this provides buggy paging in 
case the first row doesn't satisfy all clauses
-private int rowsPerQuery = Math.max(Math.min(filter.maxRows(), 
filter.maxColumns() / meanColumns), 2);
+// We have to fetch at least two rows to avoid breaking paging if 
the first row doesn't satisfy all clauses
+private int indexCellsPerQuery = Math.max(2, 
Math.min(filter.maxColumns(), filter.maxRows()));
 
 public boolean needsFiltering()
 {
@@ -144,9 +143,9 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
 
 if (indexColumns == null || indexColumns.isEmpty())
 {
-if (columnsRead  rowsPerQuery)
+if (columnsRead  indexCellsPerQuery)
 {
-logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, rowsPerQuery);
+logger.trace(Read only {} ( {}) last page 
through, must be done, columnsRead, indexCellsPerQuery);
 return makeReturn(currentKey, data);
 }
 
@@ -159,7 +158,7 @@ public class CompositesSearcher extends 
SecondaryIndexSearcher
  
lastSeenPrefix,
  
endPrefix,
  
false,
- 
rowsPerQuery,
+ 
indexCellsPerQuery,
  
filter.timestamp);
 ColumnFamily indexRow = 
index.getIndexCfs().getColumnFamily(indexFilter);
 if (indexRow == null || indexRow.getColumnCount() == 0)



[3/3] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread tylerhobbs
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/88b51539
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/88b51539
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/88b51539

Branch: refs/heads/trunk
Commit: 88b51539fb148124b76679eb9bd4b24543a9d5bb
Parents: 4937451 7804a53
Author: Tyler Hobbs ty...@datastax.com
Authored: Wed Jan 14 11:34:53 2015 -0600
Committer: Tyler Hobbs ty...@datastax.com
Committed: Wed Jan 14 11:34:53 2015 -0600

--
 CHANGES.txt  |  2 ++
 .../db/index/composites/CompositesSearcher.java  | 11 +--
 2 files changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b51539/CHANGES.txt
--

http://git-wip-us.apache.org/repos/asf/cassandra/blob/88b51539/src/java/org/apache/cassandra/db/index/composites/CompositesSearcher.java
--



[jira] [Commented] (CASSANDRA-8535) java.lang.RuntimeException: Failed to rename XXX to YYY

2015-01-14 Thread Joshua McKenzie (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-8535?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277358#comment-14277358
 ] 

Joshua McKenzie commented on CASSANDRA-8535:


Looks like the problem is in the logic for SSTableRewriter on Windows.  During 
finishAndMaybeThrow, we switchWriter(...) which closes our SSTableWriter (ifile 
and dfile) and opens a reader to the file.  Shortly after, we attempt to close 
the SSTableWriter again, this time with the finishType to attempt a rename.  
Problem is, we now have an SSTR with that ifile and dfile open on tmpfiles so 
the rename fails.

This doesn't happen on trunk as we're using FileChannels and nio in 
RandomAccessReader instead of RandomAccessFile, meaning we can safely rename 
files with open handles.  I'll see if I can't get a Windows-specific workaround 
for SSTableRewriter in for this ticket for 2.1; isolating the fix to windows 
means I'm comfortable with it being on the 2.1 branch and we'll need it on 3.0 
for mmap support anyway.

 java.lang.RuntimeException: Failed to rename XXX to YYY
 ---

 Key: CASSANDRA-8535
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8535
 Project: Cassandra
  Issue Type: Bug
 Environment: Windows 2008 X64
Reporter: Leonid Shalupov
Assignee: Joshua McKenzie
 Fix For: 2.1.3


 {code}
 java.lang.RuntimeException: Failed to rename 
 build\test\cassandra\data;0\system\schema_keyspaces-b0f2235744583cdb9631c43e59ce3676\system-schema_keyspaces-tmp-ka-5-Index.db
  to 
 build\test\cassandra\data;0\system\schema_keyspaces-b0f2235744583cdb9631c43e59ce3676\system-schema_keyspaces-ka-5-Index.db
   at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:170) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:154) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:569) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:561) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableWriter.close(SSTableWriter.java:535) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableWriter.finish(SSTableWriter.java:470) 
 ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finishAndMaybeThrow(SSTableRewriter.java:349)
  ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:324)
  ~[main/:na]
   at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:304)
  ~[main/:na]
   at 
 org.apache.cassandra.db.compaction.CompactionTask.runMayThrow(CompactionTask.java:200)
  ~[main/:na]
   at 
 org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) 
 ~[main/:na]
   at 
 org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:75)
  ~[main/:na]
   at 
 org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:59)
  ~[main/:na]
   at 
 org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:226)
  ~[main/:na]
   at 
 java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
 ~[na:1.7.0_45]
   at java.util.concurrent.FutureTask.run(FutureTask.java:262) 
 ~[na:1.7.0_45]
   at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  ~[na:1.7.0_45]
   at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  [na:1.7.0_45]
   at java.lang.Thread.run(Thread.java:744) [na:1.7.0_45]
 Caused by: java.nio.file.FileSystemException: 
 build\test\cassandra\data;0\system\schema_keyspaces-b0f2235744583cdb9631c43e59ce3676\system-schema_keyspaces-tmp-ka-5-Index.db
  - 
 build\test\cassandra\data;0\system\schema_keyspaces-b0f2235744583cdb9631c43e59ce3676\system-schema_keyspaces-ka-5-Index.db:
  The process cannot access the file because it is being used by another 
 process.
   at 
 sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:86) 
 ~[na:1.7.0_45]
   at 
 sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:97) 
 ~[na:1.7.0_45]
   at sun.nio.fs.WindowsFileCopy.move(WindowsFileCopy.java:301) 
 ~[na:1.7.0_45]
   at 
 sun.nio.fs.WindowsFileSystemProvider.move(WindowsFileSystemProvider.java:287) 
 ~[na:1.7.0_45]
   at java.nio.file.Files.move(Files.java:1345) ~[na:1.7.0_45]
   at 
 org.apache.cassandra.io.util.FileUtils.atomicMoveWithFallback(FileUtils.java:184)
  ~[main/:na]
   at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:166) 
 ~[main/:na]
   ... 18 common frames omitted
 {code}



--
This message was sent by Atlassian 

[jira] [Resolved] (CASSANDRA-8551) Windows - Failure to rename file during compaction - unit test only

2015-01-14 Thread Joshua McKenzie (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8551?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Joshua McKenzie resolved CASSANDRA-8551.

Resolution: Fixed

 Windows - Failure to rename file during compaction - unit test only
 ---

 Key: CASSANDRA-8551
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8551
 Project: Cassandra
  Issue Type: Bug
Reporter: Joshua McKenzie
Assignee: Joshua McKenzie
Priority: Minor
  Labels: Windows
 Fix For: 2.1.3


 This occurs on both 2.1 and trunk on Windows.
 The error is as follows:
 {noformat}
 ERROR 21:42:48 Fatal exception in thread Thread[CompactionExecutor:3,1,main]
 java.lang.RuntimeException: Failed to rename 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  to 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:170) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:154) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:569) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:561) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.close(SSTableWriter.java:535) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.finish(SSTableWriter.java:470) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finishAndMaybeThrow(SSTableRewriter.java:349)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:324)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:304)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.runMayThrow(CompactionTask.java:201)
  ~[main/:na]
 at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) 
 ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:75)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:59)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:226)
  ~[main/:na]
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
 ~[na:1.7.0_71]
 at java.util.concurrent.FutureTask.run(FutureTask.java:262) ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  [na:1.7.0_71]
 at java.lang.Thread.run(Thread.java:745) [na:1.7.0_71]
 Caused by: java.nio.file.FileSystemException: 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  - 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db:
  The process cannot access the file because it is being used by another 
 process.
 {noformat}
 The rename operation from -tmp to final sstable name fails on Windows as 
 something still has a handle open to the file.  This occurs during unit tests 
 only so marking as minor, but it's incredibly noisy so it's best to fix it.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Reopened] (CASSANDRA-8551) Windows - Failure to rename file during compaction - unit test only

2015-01-14 Thread Joshua McKenzie (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8551?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Joshua McKenzie reopened CASSANDRA-8551:


Need to swap resolution to duplicate.

 Windows - Failure to rename file during compaction - unit test only
 ---

 Key: CASSANDRA-8551
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8551
 Project: Cassandra
  Issue Type: Bug
Reporter: Joshua McKenzie
Assignee: Joshua McKenzie
Priority: Minor
  Labels: Windows
 Fix For: 2.1.3


 This occurs on both 2.1 and trunk on Windows.
 The error is as follows:
 {noformat}
 ERROR 21:42:48 Fatal exception in thread Thread[CompactionExecutor:3,1,main]
 java.lang.RuntimeException: Failed to rename 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  to 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:170) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:154) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:569) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:561) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.close(SSTableWriter.java:535) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.finish(SSTableWriter.java:470) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finishAndMaybeThrow(SSTableRewriter.java:349)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:324)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:304)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.runMayThrow(CompactionTask.java:201)
  ~[main/:na]
 at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) 
 ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:75)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:59)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:226)
  ~[main/:na]
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
 ~[na:1.7.0_71]
 at java.util.concurrent.FutureTask.run(FutureTask.java:262) ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  [na:1.7.0_71]
 at java.lang.Thread.run(Thread.java:745) [na:1.7.0_71]
 Caused by: java.nio.file.FileSystemException: 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  - 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db:
  The process cannot access the file because it is being used by another 
 process.
 {noformat}
 The rename operation from -tmp to final sstable name fails on Windows as 
 something still has a handle open to the file.  This occurs during unit tests 
 only so marking as minor, but it's incredibly noisy so it's best to fix it.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Resolved] (CASSANDRA-8551) Windows - Failure to rename file during compaction - unit test only

2015-01-14 Thread Joshua McKenzie (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8551?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Joshua McKenzie resolved CASSANDRA-8551.

Resolution: Duplicate

 Windows - Failure to rename file during compaction - unit test only
 ---

 Key: CASSANDRA-8551
 URL: https://issues.apache.org/jira/browse/CASSANDRA-8551
 Project: Cassandra
  Issue Type: Bug
Reporter: Joshua McKenzie
Assignee: Joshua McKenzie
Priority: Minor
  Labels: Windows
 Fix For: 2.1.3


 This occurs on both 2.1 and trunk on Windows.
 The error is as follows:
 {noformat}
 ERROR 21:42:48 Fatal exception in thread Thread[CompactionExecutor:3,1,main]
 java.lang.RuntimeException: Failed to rename 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  to 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:170) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.util.FileUtils.renameWithConfirm(FileUtils.java:154) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:569) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.rename(SSTableWriter.java:561) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.close(SSTableWriter.java:535) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableWriter.finish(SSTableWriter.java:470) 
 ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finishAndMaybeThrow(SSTableRewriter.java:349)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:324)
  ~[main/:na]
 at 
 org.apache.cassandra.io.sstable.SSTableRewriter.finish(SSTableRewriter.java:304)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.runMayThrow(CompactionTask.java:201)
  ~[main/:na]
 at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:28) 
 ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionTask.executeInternal(CompactionTask.java:75)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.AbstractCompactionTask.execute(AbstractCompactionTask.java:59)
  ~[main/:na]
 at 
 org.apache.cassandra.db.compaction.CompactionManager$BackgroundCompactionTask.run(CompactionManager.java:226)
  ~[main/:na]
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
 ~[na:1.7.0_71]
 at java.util.concurrent.FutureTask.run(FutureTask.java:262) ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  ~[na:1.7.0_71]
 at 
 java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  [na:1.7.0_71]
 at java.lang.Thread.run(Thread.java:745) [na:1.7.0_71]
 Caused by: java.nio.file.FileSystemException: 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-tmp-ka-10-Index.db
  - 
 build\test\cassandra\data;0\system\schema_columns-296e9c049bec3085827dc17d3df2122a\system-schema_columns-ka-10-Index.db:
  The process cannot access the file because it is being used by another 
 process.
 {noformat}
 The rename operation from -tmp to final sstable name fails on Windows as 
 something still has a handle open to the file.  This occurs during unit tests 
 only so marking as minor, but it's incredibly noisy so it's best to fix it.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (CASSANDRA-7409) Allow multiple overlapping sstables in L1

2015-01-14 Thread Carl Yeksigian (JIRA)

[ 
https://issues.apache.org/jira/browse/CASSANDRA-7409?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14277372#comment-14277372
 ] 

Carl Yeksigian commented on CASSANDRA-7409:
---

I've pushed up an updated branch at: 
https://github.com/carlyeks/cassandra/tree/overlapping-better-selection

The compaction selection has been updated so that it performs much better now. 
The biggest issue was selecting a single sstable for the overlapping 
compactions, instead of the that one plus the overlapping ones. It performs 
much better now.

|| || Operation Time || Compaction Time ||
| MOLO=0 | 3:52:46 | 0:21:04 |
| MOLO=2 | 3:45:52 | 0:37:50 | 
| MOLO=5 | 3:42:59 | 0:03:17 |
| LCS w/ STCS | 3:48:14 | 0:50:24 |
| LCS w/o STCS | 3:50:38 | 1:05:02 |

The performance on spinning disk is also improved by allowing overlapping; here 
are the results of a read operation after running a large mixed read/write 
workload: 
http://cstar.datastax.com/graph?stats=e113f706-9b54-11e4-9f2c-42010af0688fmetric=op_rateoperation=2_usersmoothing=1show_aggregates=truexmin=0xmax=121.88ymin=0ymax=113984.2

 Allow multiple overlapping sstables in L1
 -

 Key: CASSANDRA-7409
 URL: https://issues.apache.org/jira/browse/CASSANDRA-7409
 Project: Cassandra
  Issue Type: Improvement
Reporter: Carl Yeksigian
Assignee: Carl Yeksigian
  Labels: compaction
 Fix For: 3.0


 Currently, when a normal L0 compaction takes place (not STCS), we take up to 
 MAX_COMPACTING_L0 L0 sstables and all of the overlapping L1 sstables and 
 compact them together. If we didn't have to deal with the overlapping L1 
 tables, we could compact a higher number of L0 sstables together into a set 
 of non-overlapping L1 sstables.
 This could be done by delaying the invariant that L1 has no overlapping 
 sstables. Going from L1 to L2, we would be compacting fewer sstables together 
 which overlap.
 When reading, we will not have the same one sstable per level (except L0) 
 guarantee, but this can be bounded (once we have too many sets of sstables, 
 either compact them back into the same level, or compact them up to the next 
 level).
 This could be generalized to allow any level to be the maximum for this 
 overlapping strategy.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (CASSANDRA-8366) Repair grows data on nodes, causes load to become unbalanced

2015-01-14 Thread Jan Karlsson (JIRA)

 [ 
https://issues.apache.org/jira/browse/CASSANDRA-8366?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jan Karlsson updated CASSANDRA-8366:

Description: 
There seems to be something weird going on when repairing data.

I have a program that runs 2 hours which inserts 250 random numbers and reads 
250 times per second. It creates 2 keyspaces with SimpleStrategy and RF of 3. 

I use size-tiered compaction for my cluster. 

After those 2 hours I run a repair and the load of all nodes goes up. If I run 
incremental repair the load goes up alot more. I saw the load shoot up 8 times 
the original size multiple times with incremental repair. (from 2G to 16G)


with node 9 8 7 and 6 the repro procedure looked like this:
(Note that running full repair first is not a requirement to reproduce.)
{noformat}
After 2 hours of 250 reads + 250 writes per second:
UN  9  583.39 MB  256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  584.01 MB  256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  583.72 MB  256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  583.84 MB  256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

Repair -pr -par on all nodes sequentially
UN  9  746.29 MB  256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  751.02 MB  256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  748.89 MB  256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  758.34 MB  256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

repair -inc -par on all nodes sequentially
UN  9  2.41 GB256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  2.53 GB256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  2.6 GB 256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  2.17 GB256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

after rolling restart
UN  9  1.47 GB256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  1.5 GB 256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  2.46 GB256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  1.19 GB256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

compact all nodes sequentially
UN  9  989.99 MB  256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  994.75 MB  256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  1.46 GB256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  758.82 MB  256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

repair -inc -par on all nodes sequentially
UN  9  1.98 GB256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  2.3 GB 256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  3.71 GB256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  1.68 GB256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

restart once more
UN  9  2 GB   256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  2.05 GB256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  4.1 GB 256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  1.68 GB256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1
{noformat}

Is there something im missing or is this strange behavior?

  was:
There seems to be something weird going on when repairing data.

I have a program that runs 2 hours which inserts 250 random numbers and reads 
250 times per second. It creates 2 keyspaces with SimpleStrategy and RF of 3. 

I use size-tiered compaction for my cluster. 

After those 2 hours I run a repair and the load of all nodes goes up. If I run 
incremental repair the load goes up alot more. I saw the load shoot up 8 times 
the original size multiple times with incremental repair. (from 2G to 16G)


with node 9 8 7 and 6 the repro procedure looked like this:
(Note that running full repair first is not a requirement to reproduce.)

After 2 hours of 250 reads + 250 writes per second:
UN  9  583.39 MB  256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  584.01 MB  256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  583.72 MB  256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  583.84 MB  256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

Repair -pr -par on all nodes sequentially
UN  9  746.29 MB  256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  751.02 MB  256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  748.89 MB  256 ?   2b6b5d66-13c8-43d8-855c-290c0f3c3a0b  rack1
UN  6  758.34 MB  256 ?   b8bd67f1-a816-46ff-b4a4-136ad5af6d4b  rack1

repair -inc -par on all nodes sequentially
UN  9  2.41 GB256 ?   28220962-26ae-4eeb-8027-99f96e377406  rack1
UN  8  2.53 GB256 ?   f2de6ea1-de88-4056-8fde-42f9c476a090  rack1
UN  7  2.6 GB 256 ?   

cassandra git commit: Make sure we unmark compacting after scrub/cleanup/upgradesstables

2015-01-14 Thread marcuse
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.0 9e5a4fad7 - c5ccdb766


Make sure we unmark compacting after scrub/cleanup/upgradesstables

Patch by marcuse; reviewed by yukim for CASSANDRA-8548


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c5ccdb76
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c5ccdb76
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c5ccdb76

Branch: refs/heads/cassandra-2.0
Commit: c5ccdb7665bf16fc18f336e3967e15ac9266832d
Parents: 9e5a4fa
Author: Marcus Eriksson marc...@apache.org
Authored: Thu Jan 8 15:43:31 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:49:36 2015 +0100

--
 CHANGES.txt   |  1 +
 .../apache/cassandra/db/compaction/CompactionManager.java | 10 --
 2 files changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 9b20a06..a711790 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.0.12:
+ * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)
  * Fix DISTINCT queries with LIMITs or paging when some partitions
contain only tombstones (CASSANDRA-8490)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
--
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java 
b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
index 19dedb0..62599e3 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
@@ -220,8 +220,14 @@ public class CompactionManager implements 
CompactionManagerMBean
 {
 public Object call() throws IOException
 {
-operation.perform(cfs, sstables);
-cfs.getDataTracker().unmarkCompacting(sstables);
+try
+{
+operation.perform(cfs, sstables);
+}
+finally
+{
+cfs.getDataTracker().unmarkCompacting(sstables);
+}
 return this;
 }
 };



[2/3] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread marcuse
Merge branch 'cassandra-2.0' into cassandra-2.1


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/739f9ce3
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/739f9ce3
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/739f9ce3

Branch: refs/heads/trunk
Commit: 739f9ce380917be5c1a839d132903479c4c7aa0c
Parents: 7e6d9eb c5ccdb7
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 09:50:12 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:50:12 2015 +0100

--

--




[1/2] cassandra git commit: Make sure we unmark compacting after scrub/cleanup/upgradesstables

2015-01-14 Thread marcuse
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 7e6d9eb84 - 739f9ce38


Make sure we unmark compacting after scrub/cleanup/upgradesstables

Patch by marcuse; reviewed by yukim for CASSANDRA-8548


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c5ccdb76
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c5ccdb76
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c5ccdb76

Branch: refs/heads/cassandra-2.1
Commit: c5ccdb7665bf16fc18f336e3967e15ac9266832d
Parents: 9e5a4fa
Author: Marcus Eriksson marc...@apache.org
Authored: Thu Jan 8 15:43:31 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:49:36 2015 +0100

--
 CHANGES.txt   |  1 +
 .../apache/cassandra/db/compaction/CompactionManager.java | 10 --
 2 files changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 9b20a06..a711790 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.0.12:
+ * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)
  * Fix DISTINCT queries with LIMITs or paging when some partitions
contain only tombstones (CASSANDRA-8490)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
--
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java 
b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
index 19dedb0..62599e3 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
@@ -220,8 +220,14 @@ public class CompactionManager implements 
CompactionManagerMBean
 {
 public Object call() throws IOException
 {
-operation.perform(cfs, sstables);
-cfs.getDataTracker().unmarkCompacting(sstables);
+try
+{
+operation.perform(cfs, sstables);
+}
+finally
+{
+cfs.getDataTracker().unmarkCompacting(sstables);
+}
 return this;
 }
 };



[2/2] cassandra git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

2015-01-14 Thread marcuse
Merge branch 'cassandra-2.0' into cassandra-2.1


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/739f9ce3
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/739f9ce3
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/739f9ce3

Branch: refs/heads/cassandra-2.1
Commit: 739f9ce380917be5c1a839d132903479c4c7aa0c
Parents: 7e6d9eb c5ccdb7
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 09:50:12 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:50:12 2015 +0100

--

--




[1/3] cassandra git commit: Make sure we unmark compacting after scrub/cleanup/upgradesstables

2015-01-14 Thread marcuse
Repository: cassandra
Updated Branches:
  refs/heads/trunk b8dd3bd75 - f01b3194e


Make sure we unmark compacting after scrub/cleanup/upgradesstables

Patch by marcuse; reviewed by yukim for CASSANDRA-8548


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c5ccdb76
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c5ccdb76
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c5ccdb76

Branch: refs/heads/trunk
Commit: c5ccdb7665bf16fc18f336e3967e15ac9266832d
Parents: 9e5a4fa
Author: Marcus Eriksson marc...@apache.org
Authored: Thu Jan 8 15:43:31 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:49:36 2015 +0100

--
 CHANGES.txt   |  1 +
 .../apache/cassandra/db/compaction/CompactionManager.java | 10 --
 2 files changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 9b20a06..a711790 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.0.12:
+ * Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
  * Check for available disk space before starting a compaction (CASSANDRA-8562)
  * Fix DISTINCT queries with LIMITs or paging when some partitions
contain only tombstones (CASSANDRA-8490)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c5ccdb76/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
--
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java 
b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
index 19dedb0..62599e3 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java
@@ -220,8 +220,14 @@ public class CompactionManager implements 
CompactionManagerMBean
 {
 public Object call() throws IOException
 {
-operation.perform(cfs, sstables);
-cfs.getDataTracker().unmarkCompacting(sstables);
+try
+{
+operation.perform(cfs, sstables);
+}
+finally
+{
+cfs.getDataTracker().unmarkCompacting(sstables);
+}
 return this;
 }
 };



[3/3] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

2015-01-14 Thread marcuse
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f01b3194
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f01b3194
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f01b3194

Branch: refs/heads/trunk
Commit: f01b3194e815cb833e76fc878288a41b016b2d36
Parents: b8dd3bd 739f9ce
Author: Marcus Eriksson marc...@apache.org
Authored: Wed Jan 14 09:50:22 2015 +0100
Committer: Marcus Eriksson marc...@apache.org
Committed: Wed Jan 14 09:50:22 2015 +0100

--

--




cassandra git commit: Minor clarification/simplification in IndexedSliceReader

2015-01-14 Thread slebresne
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.0 2424dd133 - 92f753957


Minor clarification/simplification in IndexedSliceReader


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/92f75395
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/92f75395
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/92f75395

Branch: refs/heads/cassandra-2.0
Commit: 92f753957325413e2aba67f36b4305fb0316b501
Parents: 2424dd1
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 14:11:41 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 14:11:41 2015 +0100

--
 .../apache/cassandra/db/columniterator/IndexedSliceReader.java  | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/92f75395/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index ac48fc0..4409484 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -333,7 +333,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 // can be mistakenly added this way.
 if (reversed  !prefetched.isEmpty())
 {
-boolean gotSome = false;
 // Avoids some comparison when we know it's not useful
 boolean inSlice = false;
 
@@ -349,7 +348,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 if (prefetchedCol instanceof RangeTombstone)
 {
 blockColumns.addLast(prefetched.poll());
-gotSome = true;
 continue;
 }
 
@@ -367,7 +365,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 else if (inSlice || 
isColumnBeforeSliceFinish(prefetchedCol))
 {
 blockColumns.addLast(prefetched.poll());
-gotSome = true;
 inSlice = true;
 }
 // if col is after slice, ignore
@@ -376,7 +373,7 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 prefetched.poll();
 }
 }
-if (gotSome)
+if (!blockColumns.isEmpty())
 return true;
 }
 try



[1/2] cassandra git commit: Minor clarification/simplification in IndexedSliceReader

2015-01-14 Thread slebresne
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 0a17c0fd7 - cf07fc259


Minor clarification/simplification in IndexedSliceReader


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/92f75395
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/92f75395
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/92f75395

Branch: refs/heads/cassandra-2.1
Commit: 92f753957325413e2aba67f36b4305fb0316b501
Parents: 2424dd1
Author: Sylvain Lebresne sylv...@datastax.com
Authored: Wed Jan 14 14:11:41 2015 +0100
Committer: Sylvain Lebresne sylv...@datastax.com
Committed: Wed Jan 14 14:11:41 2015 +0100

--
 .../apache/cassandra/db/columniterator/IndexedSliceReader.java  | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/92f75395/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
--
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index ac48fc0..4409484 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -333,7 +333,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 // can be mistakenly added this way.
 if (reversed  !prefetched.isEmpty())
 {
-boolean gotSome = false;
 // Avoids some comparison when we know it's not useful
 boolean inSlice = false;
 
@@ -349,7 +348,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 if (prefetchedCol instanceof RangeTombstone)
 {
 blockColumns.addLast(prefetched.poll());
-gotSome = true;
 continue;
 }
 
@@ -367,7 +365,6 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 else if (inSlice || 
isColumnBeforeSliceFinish(prefetchedCol))
 {
 blockColumns.addLast(prefetched.poll());
-gotSome = true;
 inSlice = true;
 }
 // if col is after slice, ignore
@@ -376,7 +373,7 @@ class IndexedSliceReader extends 
AbstractIteratorOnDiskAtom implements OnDiskA
 prefetched.poll();
 }
 }
-if (gotSome)
+if (!blockColumns.isEmpty())
 return true;
 }
 try



  1   2   >