Author: jbellis
Date: Fri Oct 28 15:58:32 2011
New Revision: 1190412

URL: http://svn.apache.org/viewvc?rev=1190412&view=rev
Log:
"defragment" rows for name-based queries under STCS
patch by jbellis; reviewed by slebresne for CASSANDRA-2503

Modified:
    cassandra/branches/cassandra-1.0/CHANGES.txt
    cassandra/branches/cassandra-1.0/doc/cql/CQL.textile
    
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/CollationController.java

Modified: cassandra/branches/cassandra-1.0/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1190412&r1=1190411&r2=1190412&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0/CHANGES.txt Fri Oct 28 15:58:32 2011
@@ -1,3 +1,7 @@
+1.0.2
+ * "defragment" rows for name-based queries under STCS (CASSANDRA-2503)
+
+
 1.0.1
  * acquire references during index build to prevent delete problems
    on Windows (CASSANDRA-3314)

Modified: cassandra/branches/cassandra-1.0/doc/cql/CQL.textile
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/doc/cql/CQL.textile?rev=1190412&r1=1190411&r2=1190412&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/doc/cql/CQL.textile (original)
+++ cassandra/branches/cassandra-1.0/doc/cql/CQL.textile Fri Oct 28 15:58:32 
2011
@@ -1,305 +1,549 @@
+==<!-- This is only a suggestion, to demonstrate that the pre.syntax and 
pre.sample classes should probably be rendered differently. Render as you like. 
--><link rel="StyleSheet" href="CQL.css" type="text/css" media="screen">==
+
 h1. Cassandra Query Language (CQL) v2.0
 
 h2. Table of Contents
 
 {toc}
 
+h2. Syntax conventions
+
+To aid in specifying the CQL syntax, we will use the following conventions in 
this document:
+
+* Language rules will be given in a BNF(Backus-Naur Form)-like notation, 
looking like this:
+
+bc(syntax). 
+<Start> ::= <CQL_Statement>*
+          ;
+
+* Nonterminal symbols in syntax rules will have @<angle brackets>@.
+* Terminal symbols will be shown in @"single quotes"@.
+* As an additional shortcut notation to BNF, optional symbols (that can occur 
zero or one times) will be followed by a @?@ marker. Optional symbols that can 
occur zero or any number of times will be followed by a @*@ marker. Multiple 
symbols may be grouped together in @(parentheses)@ to signify that they are all 
optional or repeatable together.
+* In a few cases where meaning is more easily conveyed through prose, we 
explain a symbol and its expansion without BNF.
+* Sample code will also be shown in a code block:
+
+bc(sample). 
+SELECT sample_usage FROM cql;
+
+* References to keywords or pieces of CQL code in running text will be shown 
in a @fixed-width font@.
+
+h2. Overall syntax
+
+CQL consists of statements. As in SQL, some statements directly make changes 
to data, some look up data, and some change the way data is stored.
+
+All statements end with a semicolon.
+
+bc(syntax). 
+<CQL_Statement> ::= <statementBody> ";"
+                  ;
+<statementBody> ::= <useStatement>
+                  | <selectStatement>
+                  | <dataChangeStatement>
+                  | <schemaChangeStatement>
+                  ;
+<dataChangeStatement> ::= <insertStatement>
+                        | <updateStatement>
+                        | <batchStatement>
+                        | <deleteStatement>
+                        | <truncateStatement>
+                        ;
+<schemaChangeStatement> ::= <createKeyspaceStatement>
+                          | <createColumnFamilyStatement>
+                          | <createIndexStatement>
+                          | <dropKeyspaceStatement>
+                          | <dropColumnFamilyStatement>
+                          | <dropIndexStatement>
+                          | <alterTableStatement>
+                          ;
+
+String literals and identifiers (including keyspace and column family names) 
are case-sensitive, but CQL keywords are not. We show CQL keywords in this 
document in @UPPERCASE@ merely as a convention to aid readability.
+
+Literal values can be expressed in several ways in CQL.
+
+bc(syntax). 
+<term> ::= "KEY"
+         | <identifier>
+         | <stringLiteral>
+         | <integer>
+         | <float>
+         | <uuid>
+         ;
+
+* An @<identifier>@ is a letter followed by any sequence of letters, digits, 
or the underscore (@_@).
+* A @<stringLiteral>@ is encased in @'single quotes'@. A single quote itself 
can be represented in a string literal by doubling it, as in SQL: @'Single 
quote -> '' <-'@.
+* An @<integer>@ consists of an optional minus sign (@-@) followed by one or 
more digits (@0-9@).
+* A @<uuid>@ can be expressed in the canonical UUID form: 32 hex digits (@0-9@ 
or @a-f@, case insensitive), separated by dashes (@-@) after the 8th, 12th, 
16th, and 20th digits. Example: @01234567-0123-0123-0123-0123456789ab@
+* A @<float>@ is a series of one or more decimal digits, followed by a period 
(@.@), and one or more decimal digits following. Note that there is no 
provision for "e" notation, no optional @+@ sign, and the forms @.42@ and @42.@ 
are not accepted. Use @0.42@ and @42.0@.
+* Whitespace is not significant except to separate terms, and inside string 
literals.
+
+Comments in CQL can begin with a double dash (@--@) or a double slash (@//@) 
and extend to the end of the line. Multiline comments are enclosed in @/* ... 
*/@.
+
+h2(#storageTypes). Data Storage Types
+
+__Syntax:__
+
+bc(syntax). 
+<storageType> ::= "ascii"
+                | "bigint"
+                | "blob"
+                | "boolean"
+                | "counter"
+                | "decimal"
+                | "double"
+                | "float"
+                | "int"
+                | "text"
+                | "timestamp"
+                | "uuid"
+                | "varchar"
+                | "varint"
+                ;
+
+The following table gives additional information on the available data types.
+
+|_. type|_. description|
+|ascii|ASCII character string|
+|bigint|64-bit signed long|
+|blob|Arbitrary bytes (no validation)|
+|boolean|true or false|
+|counter|Counter column (64-bit long)|
+|decimal|Variable-precision decimal|
+|double|64-bit IEEE-754 floating point|
+|float|32-bit IEEE-754 floating point|
+|int|32-bit signed int|
+|text|UTF8 encoded string|
+|timestamp|A timestamp. See "Working with dates":#usingdates below for more 
information.|
+|uuid|Type 1 or type 4 UUID|
+|varchar|UTF8 encoded string|
+|varint|Arbitrary-precision integer|
+
+_Note: In addition to the recognized types listed above, it is also possible 
to supply a string containing the name of a class (a sub-class of 
@AbstractType@ loadable by Cassandra). The class name should either be fully 
qualified, or relative to the @org.apache.cassandra.db.marshal@ package._
+
+h3(#usingdates). Working with dates
+
+Values serialized with the @timestamp@ type are encoded as 64-bit signed 
integers representing a number of milliseconds since the standard base time 
known as "the epoch": January 1 1970 at 00:00:00 GMT.
+
+Timestamp types can be input in CQL as simple long integers, giving the number 
of milliseconds since the epoch, as defined above.
+
+Timestamp types can also be input as string literals in any of the following 
ISO 8601 formats, each representing the time and date Jan 2, 2003, at 04:05:00 
AM, GMT.:
+
+* @2011-02-03 04:05+0000@
+* @2011-02-03 04:05:00+0000@
+* @2011-02-03T04:05+0000@
+* @2011-02-03T04:05:00+0000@
+
+The @+0000@ above is an RFC 822 4-digit time zone specification; @+0000@ 
refers to GMT. US Pacific Standard Time is @-0800@. The time zone may be 
omitted if desired-- the date will be interpreted as being in the time zone 
under which the coordinating Cassandra node is configured.
+
+* @2011-02-03 04:05@
+* @2011-02-03 04:05:00@
+* @2011-02-03T04:05@
+* @2011-02-03T04:05:00@
+
+There are clear difficulties inherent in relying on the time zone 
configuration being as expected, though, so it is recommended that the time 
zone always be specified for timestamps when feasible.
+
+The time of day may also be omitted, if the date is the only piece that 
matters:
+
+* @2011-02-03@
+* @2011-02-03+0000@
+
+In that case, the time of day will default to 00:00:00, in the specified or 
default time zone.
+
 h2. USE
 
-__Synopsis:__
+__Syntax:__
+
+bc(syntax). 
+<useStatement> ::= "USE" <term>
+                 ;
+
+__Sample:__
 
-bc. 
-USE <KEYSPACE>;
+bc(sample). 
+USE myApp;
 
-A @USE@ statement consists of the @USE@ keyword, followed by a valid keyspace 
name.  Its purpose is to assign the per-connection, current working keyspace.  
All subsequent keyspace-specific actions will be performed in the context of 
the supplied value.
+A @USE@ statement consists of the @USE@ keyword, followed by a valid keyspace 
name.  Its purpose is to assign the per-connection, current working keyspace.  
All subsequent keyspace-specific actions will be performed in the context of 
the keyspace selected, unless otherwise specified, until another USE statement 
is issued or the connection terminates.
 
 h2. SELECT
 
-__Synopsis:__
+__Syntax:__
 
-bc. 
-SELECT [FIRST N] [REVERSED] <SELECT EXPR> FROM <COLUMN FAMILY> [USING 
<CONSISTENCY>]
-        [WHERE <CLAUSE>] [LIMIT N];
+bc(syntax). 
+<selectStatement> ::= "SELECT" <whatToSelect>
+                        "FROM" ( <name> "." )? <name>
+                               ( "USING" "CONSISTENCY" <consistencylevel> )?
+                               ( "WHERE" <selectWhereClause> )?
+                               ( "LIMIT" <integer> )?
+                    ;
+<whatToSelect> ::= <term> ( "," <term> )*
+                 | ("FIRST" <integer> )? "REVERSED"? <columnRange>
+                 | "COUNT" "(" <countTarget> ")"
+                 ;
+<columnRange> ::= <term> ".." <term>
+                | "*"
+                ;
+<countTarget> ::= "*"
+                | "1"
+                ;
+<name> ::= <identifier>
+         | <stringLiteral>
+         | <integer>
+         ;
+<selectWhereClause> ::= <relation> ( "AND" <relation> )*
+                      | <term> "IN" "(" <term> ( "," <term> )* ")"
+                      ;
+<relation> ::= <term> <relationOperator> <term>
+             ;
+<relationOperator> ::= "=" | "<" | ">" | "<=" | ">="
+                     ;
+
+__Sample:__
+
+bc(sample). 
+SELECT Name, Occupation FROM People WHERE key IN (199, 200, 207);
+SELECT FIRST 3 REVERSED 'time199'..'time100' FROM Events;
+SELECT COUNT(*) FROM system.Migrations;
 
 A @SELECT@ is used to read one or more records from a Cassandra column family. 
It returns a result-set of rows, where each row consists of a key and a 
collection of columns corresponding to the query.
 
 h3. Specifying Columns
 
-bc. 
-SELECT [FIRST N] [REVERSED] name1, name2, name3 FROM ...
-SELECT [FIRST N] [REVERSED] name1..nameN FROM ...
+bc(sample). 
+SELECT col1, col2 FROM ...
+SELECT range_lo..range_hi FROM ...
+SELECT * FROM ...
+SELECT FIRST 4 REVERSED range_hi..range_lo FROM ...
 
-The SELECT expression determines which columns will appear in the results and 
takes the form of either a comma separated list of names, or a range. The range 
notation consists of a start and end column name separated by two periods 
(@..@). The set of columns returned for a range is start and end inclusive.
+The @SELECT@ expression determines which columns will appear in the results 
and can take a few different forms, as shown above. The simplest is a 
comma-separated list of column names. Note that column names in Cassandra can 
be specified with string literals or integers, in addition to identifiers.
 
-The @FIRST@ option accepts an integer argument and can be used to apply a 
limit to the number of columns returned per row.  When this limit is left unset 
it defaults to 10,000 columns.
+It is also possible to specify a range of column names. The range notation 
consists of start and end column names, separated by two periods (@..@). The 
set of columns returned for a range is start and end inclusive. A single star 
(@*@) may be used as a range to request "all columns".
 
-The @REVERSED@ option causes the sort order of the results to be reversed.
+When using a range, it is sometimes useful to limit the number of columns that 
can be returned as part of each row (since Cassandra is schemaless, it is not 
necessarily possible to determine ahead of time how many columns will be in the 
result set). To accomplish this, use the @FIRST@ clause with an integer to 
specify an upper limit on the number of columns returned per row. The default 
limit is 10,000 columns.
 
-It is worth noting that unlike the projection in a SQL SELECT, there is no 
guarantee that the results will contain all of the columns specified. This is 
because Cassandra is schema-less and there are no guarantees that a given 
column exists.
+The @REVERSED@ option causes the sort order of the columns returned to be 
reversed. This affects the @FIRST@ clause; when limiting the columns returned, 
the columns at the end of the range will be selected instead of the ones at the 
beginning of the range.
 
-h3. Column Family
-
-bc. 
-SELECT ... FROM <COLUMN FAMILY> ...
+A @SELECT@ expression may also be @COUNT(*)@. In this case, the result will be 
only one value: the number of rows which matched the query.
 
-The @FROM@ clause is used to specify the Cassandra column family applicable to 
a @SELECT@ query.
+It is worth noting that unlike the projection in a SQL SELECT, there is no 
guarantee that the results will contain all of the columns specified, because 
Cassandra is schemaless.
 
-bc.
-SELECT count(*) FROM <COLUMN FAMILY> ...
+h3. Column Family
 
-The @count@ aggregate function returns a single row, with a single column 
"count" whose value is the number of rows from the pre-aggregation resultset.
+bc(sample). 
+SELECT ... FROM MyApp.LocationSnapshots ...;
+SELECT ... FROM EventTimeline ...;
 
-Currently, @count@ is the only function supported by CQL.
+The @FROM@ clause is used to specify the Cassandra column family applicable to 
a @SELECT@ query. Unlike other operations on column families, the keyspace in 
which the column family exists may also be specified by giving its name before 
the column family name, and separating them by a dot (@.@). If the keyspace is 
not specified, the current keyspace will be used, as per normal.
 
 h3. Consistency Level
 
-bc. 
-SELECT ... [USING <CONSISTENCY>] ...
+bc(sample). 
+SELECT ... USING CONSISTENCY QUORUM;
 
 Following the column family clause is an optional "consistency level 
specification":#consistency.
 
 h3. Filtering rows
 
-bc. 
-SELECT ... WHERE <KEY> = keyname AND name1 = value1
-SELECT ... WHERE <KEY> >= startkey and <KEY> =< endkey AND name1 = value1
-SELECT ... WHERE <KEY> IN ('<key>', '<key>', '<key>', ...)
+bc(sample). 
+SELECT ... WHERE KEY = 11194251 AND startdate = '2011-10-08-0500';
+SELECT ... WHERE KEY >= 'AM' and KEY =< 'AZ' AND module = 17;
+SELECT ... WHERE keyalias IN ('key1', 'key2', 'key3', ...);
 
-The WHERE clause provides for filtering the rows that appear in results.  The 
clause can filter on a key name, or range of keys, and in the case of indexed 
columns, on column values.  Key filters are specified using the @KEY@ keyword 
or key alias name, a relational operator, (one of @=@, @>@, @>=@, @<@, and 
@<=@), and a term value.  When terms appear on both sides of a relational 
operator it is assumed the filter applies to an indexed column. With column 
index filters, the term on the left of the operator is the name, the term on 
the right is the value to filter __on__.
+The @WHERE@ clause provides for filtering the rows that appear in results.  
The clause can filter on a key name, or range of keys, and in the case of 
indexed columns, on column values.  Key filters are specified using the @KEY@ 
keyword or key alias name, followed by a relational operator (one of @=@, @>@, 
@>=@, @<@, and @<=@), and then a term value.  When terms appear on both sides 
of a relational operator it is assumed the filter applies to an indexed column. 
With column index filters, the term on the left of the operator must be the 
name of the indexed column, and the term on the right is the value to filter 
__on__.
 
-__Note: The greater-than and less-than operators (@>@ and @<@) result in key 
ranges that are inclusive of the terms. There is no supported notion of 
"strictly" greater-than or less-than; these operators are merely supported as 
aliases to @>=@ and @<=@.__  
+__Note: The greater-than and less-than operators (@>@ and @<@) result in key 
ranges that are inclusive of the terms. There is no supported notion of 
"strictly" greater-than or less-than; these operators are merely supported as 
aliases to @>=@ and @<=@.__
 
 h3. Limits
 
-bc. 
-SELECT ... WHERE <CLAUSE> [LIMIT N] ...
-
-Limiting the number of rows returned can be achieved by adding the @LIMIT@ 
option to a @SELECT@ expression. @LIMIT@ defaults to 10,000 when left unset.
-
-h2. ALTER TABLE
-
-_Synopsis:_
+bc(sample). 
+SELECT ... WHERE favoriteArtist = 'The Mighty Mighty Bosstones' LIMIT 90000;
 
-bc.
-ALTER TABLE <columnFamily> ADD <column> <validator>;
-ALTER TABLE <columnFamily> ALTER <column> TYPE <validator>;
-ALTER TABLE <columnFamily> DROP <column>;
-
-An @ALTER@ is used to manipulate with ColumnFamily columns. It allows you to 
add new columns, alter and drop existing columns. No results are returned.
+The @LIMIT@ option to a @SELECT@ expression limits the number of rows returned 
by a query. @LIMIT@ defaults to 10,000 when left unset.
 
 h2. INSERT
 
-_Synopsis:_
+__Syntax:__
 
-bc. 
-INSERT INTO <COLUMN FAMILY> (<KEY>, <col>, <col>, ...) VALUES (<key>, <val>, 
<val>, ...) [USING CONSISTENCY <LEVEL> [AND TIMESTAMP <timestamp>] [AND TTL 
<timeToLive>]];
+bc(syntax). 
+<insertStatement> ::= "INSERT" "INTO" <name>
+                               "(" <term> "," <term> ( "," <term> )* ")"
+                      "VALUES" "(" <term> "," <term> ( "," <term> )* ")"
+                      ( "USING" <usingOption> ( "AND" <usingOption> )* )?
+                    ;
+<usingOption> ::= "CONSISTENCY" <consistencylevel>
+                | "TIMESTAMP" <integer>
+                | "TTL" <integer>
+                ;
+
+__Sample:__
+
+bc(sample). 
+INSERT INTO NerdMovies (KEY, 11924)
+                VALUES ('Serenity', 'Nathan Fillion')
+      USING CONSISTENCY LOCAL_QUORUM AND TTL 86400;
 
-An @INSERT@ is used to write one or more columns to a record in a Cassandra 
column family. No results are returned. 
+An @INSERT@ is used to write one or more columns to a record in a Cassandra 
column family. No results are returned.
 
-If timestamp is unspecified, the server will generate a timestamp based on 
microseconds since 1970.
+The first column name in the @INSERT@ list must be the name of the column 
family key. Also, there must be more than one column name specified (Cassandra 
rows are not considered to exist with only a key and no associated columns).
 
-Unlike SQL, the semantics of @INSERT@ and @UPDATE@ are identical, in either 
case a record is created if none existed before, and updated when it does.  For 
information on modifiers and types, see the "@UPDATE@":#update section below.
+Unlike in SQL, the semantics of @INSERT@ and @UPDATE@ are identical. In either 
case a record is created if none existed before, and updated when it does.  For 
information on query modifiers and types, see the "@UPDATE@":#update section 
below.
 
 h2(#update). UPDATE
 
-_Synopsis:_
+__Syntax:__
 
-bc. 
-UPDATE <COLUMN FAMILY> [USING <CONSISTENCY> [AND TIMESTAMP <timestamp>] [AND 
TTL <timeToLive>]]
-        SET name1 = value1, name2 = value2 WHERE <KEY> = keyname;
-        
-An @UPDATE@ is used to write one or more columns to a record in a Cassandra 
column family. No results are returned. Key can be given using @KEY@ keyword or 
by alias set per ColumnFamily.
+bc(syntax). 
+<updateStatement> ::= "UPDATE" <name>
+                        ( "USING" <usingOption> ( "AND" <usingOption> )* )?
+                        "SET" <assignment> ( "," <assignment> )*
+                        "WHERE" <updateWhereClause>
+                    ;
+<assignment> ::= <term> "=" <term>
+               | <term> "=" <term> "+" <term>
+               | <term> "=" <term> "-" <term>
+               ;
+<updateWhereClause> ::= <term> "=" <term>
+                      | <term> "IN" "(" <term> ( "," <term> )* ")"
+                      ;
+
+__Sample:__
+
+bc(sample). 
+UPDATE NerdMovies USING CONSISTENCY ALL AND TTL 400
+       SET 'A 1194' = 'The Empire Strikes Back',
+           'B 1194' = 'Han Solo'
+     WHERE KEY = B70DE1D0-9908-4AE3-BE34-5573E5B09F14;
+UPDATE UserActionCounts SET total = total + 2 WHERE keyalias = 523;
 
-h3. Column Family
+An @UPDATE@ is used to write one or more columns to a record in a Cassandra 
column family. No results are returned. The row key can be specified using the 
@KEY@ keyword or by a key alias set per column family.
 
-bc. 
-UPDATE <COLUMN FAMILY> ...
+h3. Column Family
 
 Statements begin with the @UPDATE@ keyword followed by a Cassandra column 
family name.
 
 h3. Consistency Level
 
-bc. 
-UPDATE ... [USING <CONSISTENCY>] ...
+bc(sample). 
+UPDATE Foo USING CONSISTENCY EACH_QUORUM ...
 
-Following the column family identifier is an optional "consistency level 
specification":#consistency.
+Following the column family identifier is an optional @USING@ clause, which 
can specify the "consistency level":#consistency for the update, or the 
timestamp and/or the TTL for the new columns.
 
 h3. Timestamp
 
-bc.
-UPDATE ... [USING TIMESTAMP <timestamp>] ...
+bc(sample). 
+UPDATE Foo USING TIMESTAMP=1318452291034 ...
 
 @UPDATE@ supports setting client-supplied optional timestamp for modification.
 
 h3. TTL
 
-bc.
-UPDATE ... [USING TTL <timeToLive>] ...
+bc(sample). 
+UPDATE Foo USING TTL=6800 ...
 
-@UPDATE@ supports setting time to live (TTL) for each of the columns in 
@UPDATE@ statement.
+@UPDATE@ supports setting a time to live (TTL), in seconds, for each of the 
added columns.
 
 h3. Specifying Columns and Row
 
-bc. 
-UPDATE ... SET name1 = value1, name2 = value2 WHERE <KEY> = keyname;
-UPDATE ... SET name1 = value1, name2 = value2 WHERE <KEY> IN ('<key>', 
'<key>', ...)
+bc(sample). 
+UPDATE ... SET col1 = val1, col2 = val2 WHERE KEY = key1;
+UPDATE ... SET col3 = val3 WHERE KEY IN (key1, key2, key3);
+UPDATE ... SET col4 = 22 WHERE keyalias = key4;
 
-Rows are created or updated by supplying column names and values in term 
assignment format. Multiple columns can be set by separating the name/value 
pairs using commas.  Each update statement requires exactly one key to be 
specified using a WHERE clause and the @KEY@ keyword or key alias.
+Rows are created or updated by supplying column names and values, after the 
@SET@ keyword, in term assignment format. Multiple columns can be set by 
separating the name/value pairs using commas.  Each update statement requires a 
precise set of row keys to be specified using a @WHERE@ clause and the @KEY@ 
keyword or key alias.
 
 h3. Updating Counter Columns
 
-bc. 
-UPDATE ... SET name1 = name1 + <value> ...
-UPDATE ... SET name1 = name1 - <value> ...
+bc(sample). 
+UPDATE ... SET name1 = name1 + <value> ...;
+UPDATE ... SET name1 = name1 - <value> ...;
 
 Counter columns can be incremented or decremented by an arbitrary numeric 
value though the assignment of an expression that adds or substracts the value.
 
 h2. DELETE
 
-_Synopsis:_
+__Syntax:__
 
-bc. 
-DELETE [COLUMNS] FROM <COLUMN FAMILY> [USING <CONSISTENCY>] WHERE <KEY> = 
keyname1
-DELETE [COLUMNS] FROM <COLUMN FAMILY> [USING <CONSISTENCY>] WHERE <KEY> IN 
(keyname1, keyname2);
+bc(syntax). 
+<deleteStatement> ::= "DELETE" ( <term> ( "," <term> )* )?
+                        "FROM" <name>
+                        ( "USING" <deleteOption> ( "AND" <deleteOption> )* )?
+                        "WHERE" <updateWhereClause>
+                    ;
+<deleteOption> ::= "CONSISTENCY" <consistencylevel>
+                 | "TIMESTAMP" <integer>
+                 ;
+
+__Sample:__
+
+bc(sample). 
+DELETE col1, col2, col3 FROM Planeteers USING CONSISTENCY ONE WHERE KEY = 
'Captain';
+DELETE FROM MastersOfTheUniverse WHERE KEY IN ('Man-At-Arms', 'Teela');
 
-A @DELETE@ is used to perform the removal of one or more columns from one or 
more rows. Key can be given using @KEY@ keyword or by alias set per 
ColumnFamily.
+A @DELETE@ is used to perform the removal of one or more columns from one or 
more rows. The key can be given using the @KEY@ keyword or by the key alias set 
per column family.
 
 h3. Specifying Columns
 
-bc. 
-DELETE [COLUMNS] ...
-
-Following the @DELETE@ keyword is an optional comma-delimited list of column 
name terms. When no column names are specified, the remove applies to the 
entire row(s) matched by the "WHERE clause":#deleterows
+Following the @DELETE@ keyword is an optional comma-delimited list of column 
name terms. When no column names are specified, the remove applies to the 
entire row(s) matched by the "WHERE clause":#deleterows.
 
 h3. Column Family
 
-bc. 
-DELETE ... FROM <COLUMN FAMILY> ...
-
-The column family name follows the list of column names.
+The column family name follows the list of column names and the keyword @FROM@.
 
 h3. Consistency Level
 
-bc. 
-DELETE ... [USING <CONSISTENCY>] ...
-
 Following the column family identifier is an optional "consistency level 
specification":#consistency.
 
 h3(#deleterows). Specifying Rows
 
-bc. 
-DELETE ... WHERE <KEY> = keyname1
-DELETE ... WHERE <KEY> IN (keyname1, keyname2)
+bc(sample). 
+DELETE ... WHERE KEY = 'some_key_value';
+DELETE ... WHERE keyalias IN (key1, key2);
 
-The @WHERE@ clause is used to determine which row(s) a @DELETE@ applies to. 
The first form allows the specification of a single keyname using the @KEY@ 
keyword (or by key alias) and the @=@ operator.  The second form allows a list 
of keyname terms to be specified using the @IN@ notation and a parenthesized 
list of comma-delimited keyname terms.
+The @WHERE@ clause is used to determine to which row(s) a @DELETE@ applies. 
The first form allows the specification of a single keyname using the @KEY@ 
keyword (or by key alias) and the @=@ operator.  The second form allows a list 
of keyname terms to be specified using the @IN@ notation and a parenthesized 
list of comma-delimited keyname terms.
 
-h2. BATCH
+h2. TRUNCATE
+
+__Syntax:__
+
+bc(syntax). 
+<truncateStatement> ::= "TRUNCATE" <name>
+                      ;
 
-_Synopsis:_
+__Sample:__
 
-bc.
-BATCH BEGIN BATCH [USING CONSISTENCY <LEVEL> [AND TIMESTAMP <timestamp>]]
-    INSERT or UPDATE or DELETE statements separated by semicolon or "end of 
line"
-APPLY BATCH
+bc(sample). 
+TRUNCATE super_important_data;
 
-@BATCH@ supports setting client-supplied optional global timestamp which will 
be used for each of the operations included in batch.
+@TRUNCATE@ accepts a single argument for the column family name, and 
permanently removes all data from said column family.
 
-A single consistency level is used for the entire batch, it appears after the 
@BEGIN BATCH@ statement, and uses the standard "consistency level 
specification":#consistency. Batch default to @CONSISTENCY.ONE@ when left 
unspecified.
+h2. BATCH
+
+__Syntax:__
 
-_NOTE: While there are no isolation guarantees,  @UPDATE@ queries are atomic 
within a give record._
+bc(syntax). 
+<batchStatement> ::= "BEGIN" "BATCH"
+                        ( "USING" <usingOption> ( "AND" <usingOption> )* )?
+                        <batchStatementMember> ( ";" <batchStatementMember> )*
+                     "APPLY" "BATCH"
+                   ;
+<batchStatementMember> ::= <insertStatement>
+                         | <updateStatement>
+                         | <deleteStatement>
+                         ;
 
-_Example:_
+__Sample:__
 
-bc.
-BEGIN BATCH USING CONSISTENCY QUORUM
+bc(sample). 
+BEGIN BATCH USING CONSISTENCY QUORUM AND TTL 8640000
   INSERT INTO users (KEY, password, name) VALUES ('user2', 'ch@ngem3b', 
'second user')
   UPDATE users SET password = 'ps22dhds' WHERE KEY = 'user2'
   INSERT INTO users (KEY, password) VALUES ('user3', 'ch@ngem3c')
   DELETE name FROM users WHERE key = 'user2'
   INSERT INTO users (KEY, password, name) VALUES ('user4', 'ch@ngem3c', 
'Andrew')
-APPLY BATCH
-     
-h2. TRUNCATE
+APPLY BATCH;
 
-_Synopsis:_
+@BATCH@ supports setting a client-supplied optional global timestamp which 
will be used for each of the operations included in the batch.
 
-bc. 
-TRUNCATE <COLUMN FAMILY>
+A single consistency level is used for the entire batch. It appears after the 
@BEGIN BATCH@ statement, and uses the standard "consistency level 
specification":#consistency. Batched statements default to @CONSISTENCY.ONE@ 
when left unspecified.
 
-Accepts a single argument for the column family name, and permanently removes 
all data from said column family.
+Only data modification statements (specifically, @UPDATE@, @INSERT@, and 
@DELETE@) are allowed in a @BATCH@ statement. @BATCH@ is _not_ an analogue for 
SQL transactions.
+
+_NOTE: While there are no isolation guarantees, @UPDATE@ queries are atomic 
within a given record._
 
 h2. CREATE KEYSPACE
 
-_Synopsis:_
+__Syntax:__
 
-bc. 
-CREATE KEYSPACE <NAME> WITH AND strategy_class = <STRATEGY>
-    AND strategy_options.<OPTION> = <VALUE> [AND strategy_options.<OPTION> = 
<VALUE>];
+bc(syntax). 
+<createKeyspaceStatement> ::= "CREATE" "KEYSPACE" <name>
+                                 "WITH" <optionName> "=" <optionVal>
+                                 ( "AND" <optionName> "=" <optionVal> )*
+                            ;
+<optionName> ::= <identifier>
+               | <optionName> ":" <identifier>
+               | <optionName> ":" <integer>
+               ;
+<optionVal> ::= <stringLiteral>
+              | <identifier>
+              | <integer>
+              ;
+
+__Sample:__
+
+bc(sample). 
+CREATE KEYSPACE Excelsior WITH strategy_class = 'SimpleStrategy'
+    AND strategy_options:replication_factor = 1;
+CREATE KEYSPACE Excalibur WITH strategy_class = 'NetworkTopologyStrategy'
+    AND strategy_options:DC1 = 1 AND strategy_options:DC2 = 3;
 
-The @CREATE KEYSPACE@ statement creates a new top-level namespace (aka 
"keyspace"). Valid names are any string constructed of alphanumeric characters 
and underscores, but must begin with a letter.  Properties such as replication 
strategy and count are specified during creation using the following accepted 
keyword arguments:
+The @CREATE KEYSPACE@ statement creates a new top-level namespace (aka 
"keyspace"). Valid names are any string constructed of alphanumeric characters 
and underscores. Names which do not work as valid identifiers or integers 
should be quoted as string literals. Properties such as replication strategy 
and count are specified during creation using the following accepted keyword 
arguments:
 
 |_. keyword|_. required|_. description|
+|strategy_class|yes|The name of the replication strategy class which should be 
used for the new keyspace. Some often-used classes are @SimpleStrategy@ and 
@NetworkTopologyStrategy@.|
 |strategy_options|no|Most strategies require additional arguments which can be 
supplied by appending the option name to the @strategy_options@ keyword, 
separated by a colon (@:@).  For example, a strategy option of "DC1" with a 
value of "1" would be specified as @strategy_options:DC1 = 1@; 
replication_factor for SimpleStrategy could be 
@strategy_options:replication_factor=3@.|
 
 h2. CREATE COLUMNFAMILY
 
-_Synopsis:_
+__Syntax:__
 
-bc. 
-CREATE COLUMNFAMILY <COLUMN FAMILY> (<KEY> <type> PRIMARY KEY [, name1 type, 
name2 type, ...]);
-CREATE COLUMNFAMILY <COLUMN FAMILY> (<KEY> <type> PRIMARY KEY [, name1 type, 
name2 type, ...])
-    [WITH keyword1 = arg1 [AND keyword2 = arg2 [AND ...]]];
+bc(syntax). 
+<createColumnFamilyStatement> ::= "CREATE" "COLUMNFAMILY" <name>
+                                    "(" <term> <storageType> "PRIMARY" "KEY"
+                                        ( "," <term> <storageType> )* ")"
+                                   ( "WITH" <identifier> "=" <cfOptionVal>
+                                     ( "AND" <identifier> "=" <cfOptionVal> )* 
)?
+                                ;
+<cfOptionVal> ::= <storageType>
+                | <identifier>
+                | <stringLiteral>
+                | <integer>
+                | <float>
+                ;
+
+__Sample:__
+
+bc(sample). 
+CREATE COLUMNFAMILY Fish (KEY blob PRIMARY KEY);
+CREATE COLUMNFAMILY FastFoodEatings (user text PRIMARY KEY)
+    WITH comparator=timestamp AND default_validation=int;
+CREATE COLUMNFAMILY MonkeyTypes (
+    KEY uuid PRIMARY KEY,
+    species text,
+    alias text,
+    population varint
+) WITH comment='Important biological records'
+   AND read_repair_chance = 1.0;
 
 @CREATE COLUMNFAMILY@ statements create new column family namespaces under the 
current keyspace. Valid column family names are strings of alphanumeric 
characters and underscores, which begin with a letter.
 
 h3(#keytypes). Specifying Key Type
 
-bc. 
-CREATE ... (<KEY> <type> PRIMARY KEY) ...
+bc(sample). 
+CREATE ... (KEY ascii PRIMARY KEY, ... ) ...
 
-When creating a new column family, you must specify key type.  The list of 
possible key types is identical to column comparators/validators, (see 
"Specifying Column Type":columntypes).  It's important to note that the key 
type must be compatible with the partitioner in use, for example 
@OrderPreservingPartitioner@ and @CollatingOrderPreservingPartitioner@ both 
require UTF-8 keys. If you use name instead of @KEY@ keyword, name alias will 
be set automatically.
+When creating a new column family, you must specify the key type. The list of 
possible types is identical to column comparators/validators (see "Data Storage 
Types":#storageTypes), except it probably does not make sense to use @counter@ 
for a key. It's important to note that the key type you use must be compatible 
with the partitioner in use. For example, @OrderPreservingPartitioner@ and 
@CollatingOrderPreservingPartitioner@ both require UTF-8 keys. If you use an 
identifier for the primary key name, instead of the @KEY@ keyword, a key alias 
will be set automatically.
 
-h3(#columntypes). Specifying Column Type (optional)
+h3. Specifying Column Types (optional)
 
-bc. 
-CREATE ... (<KEY> <type> PRIMARY KEY, name1 type, name2 type) ...
+bc(sample). 
+CREATE ... ( ... , name1 type1, name2 type2, ... ) ...
 
-It is possible to assign columns a type during column family creation.  
Columns configured with a type are validated accordingly when a write occurs. 
Column types are specified as a parenthesized, comma-separated list of column 
term and type pairs.  The list of recognized types are:
-
-|_. type|_. description|
-|ascii|ASCII character string|
-|bigint|8-byte long|
-|blob|Arbitrary bytes (no validation)|
-|boolean|true or false|
-|counter|Counter column, (8-byte long)|
-|decimal|Variable-precision decimal|
-|double|8-byte floating point|
-|float|4-byte floating point|
-|int|4-byte int|
-|text|UTF8 encoded string|
-|timestamp|Date + Time, encoded as 8 bytes since epoch|
-|uuid|Type 1, or type 4 UUID|
-|varchar|UTF8 encoded string|
-|varint|Arbitrary-precision integer|
-
-_Note: In addition to the recognized types listed above, it is also possible 
to supply a string containing the name of a class (a sub-class of 
@AbstractType@), either fully qualified, or relative to the 
@org.apache.cassandra.db.marshal@ package._
+It is possible to assign columns a type during column family creation. Columns 
configured with a type are validated accordingly when a write occurs, and 
intelligent CQL drivers and interfaces will be able to decode the column values 
correctly when receiving them. Column types are specified as a parenthesized, 
comma-separated list of column term and type pairs. See "Data Storage 
Types":#storageTypes for the list of recognized types.
 
 h3. Column Family Options (optional)
 
-bc. 
+bc(sample). 
 CREATE COLUMNFAMILY ... WITH keyword1 = arg1 AND keyword2 = arg2;
 
 A number of optional keyword arguments can be supplied to control the 
configuration of a new column family.
 
 |_. keyword|_. default|_. description|
-|comparator|text|Determines sorting and validation of column names. Valid 
values are identical to the types listed in "Specifying Column 
Type":#columntypes above.|
+|comparator|text|Determines the storage type of column names (which itself 
determines the sorting and validation of column names). Valid values are listed 
in the "Data Storage Types":#storageTypes table above.|
 |comment|none|A free-form, human-readable comment.|
 |row_cache_provider|SerializingCacheProvider if JNA is present, otherwise 
ConcurrentHashMapCacheProvider|A factory for the cache with which to back the 
row cache.|
 |row_cache_size|0|Number of rows whose entire contents to cache in memory.|
 |key_cache_size|200000|Number of keys per SSTable whose locations are kept in 
memory in "mostly LRU" order.|
 |read_repair_chance|1.0|The probability with which read repairs should be 
invoked on non-quorum reads.|
 |gc_grace_seconds|864000|Time to wait before garbage collecting tombstones 
(deletion markers).|
-|default_validation|text|Determines validation of column values. Valid values 
are identical to the types listed in "Specifying Column Type":#columntypes 
above.|
+|default_validation|text|Determines the default storage type of column values 
(which itself determines the validation for column values). This option does 
not affect the types of columns which were defined in a @CREATE COLUMNFAMILY@ 
statement-- only new columns. Valid values are listed in the "Data Storage 
Types":#storageTypes table above.|
 |min_compaction_threshold|4|Minimum number of SSTables needed to start a minor 
compaction.|
 |max_compaction_threshold|32|Maximum number of SSTables allowed before a minor 
compaction is forced.|
 |row_cache_save_period_in_seconds|0|Number of seconds between saving row 
caches.|
@@ -308,83 +552,135 @@ A number of optional keyword arguments c
 
 h2. CREATE INDEX
 
-_Synopsis:_
+__Syntax:__
+
+bc(syntax). 
+<createIndexStatement> ::= "CREATE" "INDEX" <identifier>? "ON"
+                               <name> "(" <term> ")"
+                         ;
+
+__Sample:__
+
+bc(sample). 
+CREATE INDEX userIndex ON NerdMovies (user);
+CREATE INDEX ON Mutants (abilityId);
+
+A @CREATE INDEX@ statement is used to create a new, automatic secondary index 
on the given column family, for the named column. A name for the index itself 
can be specified before the @ON@ keyword, if desired. A single column name must 
be specified inside the parentheses. It is not necessary for the column to 
exist on any current rows (Cassandra is schemaless), but the column must 
already have a type (specified during the @CREATE COLUMNFAMILY@, or added 
afterwards with @ALTER COLUMNFAMILY@.
+
+h2. DROP KEYSPACE
+
+__Syntax:__
+
+bc(syntax). 
+<dropKeyspaceStatement> ::= "DROP" "KEYSPACE" <name>
+                          ;
+
+__Sample:__
+
+bc(sample). DROP KEYSPACE MyTwitterClone;
 
-bc. CREATE INDEX [index_name] ON <column_family> (column_name);
+A @DROP KEYSPACE@ statement results in the immediate, irreversible removal of 
a keyspace, including all column families in it, and all data contained in 
those column families.
 
-A @CREATE INDEX@ statement is used to create a new, automatic secondary index 
for the named column.
+h2. DROP COLUMNFAMILY
+
+__Syntax:__
+
+bc(syntax). 
+<dropColumnFamilyStatement> ::= "DROP" "COLUMNFAMILY" <name>
+                              ;
+
+__Sample:__
+
+bc(sample). DROP COLUMNFAMILY worldSeriesAttendees;
+
+A @DROP COLUMNFAMILY@ statement results in the immediate, irreversible removal 
of a column family, including all data contained in it.
 
 h2. DROP INDEX
 
-_Synopsis:_
+__Syntax:__
+
+bc(syntax). 
+<dropIndexStatement> ::= "DROP" "INDEX" <name>
+                       ;
 
-bc. DROP INDEX <INDEX_NAME>
+__Sample:__
+
+bc(sample). DROP INDEX cf_col_idx;
 
 A @DROP INDEX@ statement is used to drop an existing secondary index.
-DROP INDEX <INDEX_NAME> statement will search all ColumnFamilies in the 
current Keyspace for specified index and delete it if found.
 
-h2. DROP
+h2. ALTER COLUMNFAMILY
+
+__Syntax:__
+
+bc(syntax). 
+<alterTableStatement> ::= "ALTER" "COLUMNFAMILY" <name> <alterInstructions>
+                        ;
+<alterInstructions> ::= "ALTER" <name> "TYPE" <storageType>
+                      | "ADD" <name> <storageType>
+                      | "DROP" <name>
+                      ;
+
+An @ALTER@ statement is used to manipulate column family column metadata. It 
allows you to add new columns, drop existing columns, or change the data 
storage type of existing columns. No results are returned.
+
+Specify the name of the column family to be changed after the @ALTER 
COLUMNFAMILY@ keywords, and the name of the column to be changed, added, or 
dropped after the keyword corresponding to the type of change desired (@ALTER@, 
@ADD@, @DROP@).
+
+h3. Changing the type of a typed column
 
-_Synopsis:_
+bc(sample). 
+ALTER COLUMNFAMILY addamsFamily ALTER lastKnownLocation TYPE uuid;
 
-bc. DROP <KEYSPACE|COLUMNFAMILY> namespace;
+@ALTER COLUMNFAMILY ... ALTER@ changes the expected storage type for a column. 
The column must already have a type in the column family metadata. The column 
may or may not already exist in current rows-- but be aware that no validation 
of existing data is done. The bytes stored in values for that column will 
remain unchanged, and if existing data is not deserializable according to the 
new type, this may cause your CQL driver or interface to report errors.
 
-@DROP@ statements result in the immediate, irreversible removal of keyspace 
and column family namespaces.
+h3. Adding a typed column
+
+bc(sample). 
+ALTER COLUMNFAMILY addamsFamily ADD gravesite varchar;
+
+The @ALTER COLUMNFAMILY ... ADD@ variant adds a typed column to a column 
family. The column must not already have a type in the column family metadata. 
The same warnings from the above @ALTER@ section, about there being no 
validation of existing data, apply here as well.
+
+h3. Dropping a typed column
+
+bc(sample). 
+ALTER COLUMNFAMILY addamsFamily DROP gender;
+
+An @ALTER COLUMNFAMILY ... DROP@ statement removes the type of a column from 
the column family metadata. Note that this does _not_ remove the column from 
current rows; it just removes the metadata saying that the bytes stored under 
that column are expected to be deserializable according to a certain type.
 
 h2. Common Idioms
 
 h3(#consistency). Specifying Consistency
 
-bc. 
-... USING <CONSISTENCY> ...
+bc(syntax). 
+<consistency> ::= "ANY"
+                | "ONE"
+                | "QUORUM"
+                | "ALL"
+                | "LOCAL_QUORUM"
+                | "EACH_QUORUM"
+                ;
+
+bc(sample). 
+... USING CONSISTENCY LOCAL_QUORUM ...
 
-Consistency level specifications are made up the keyword @USING@, followed by 
a consistency level identifier. Valid consistency levels are as follows:
+Consistency level specifications are made up the keywords @USING CONSISTENCY@, 
followed by a consistency level identifier. Valid consistency level identifiers 
are as listed above. When not specified, @USING CONSISTENCY ONE@ is the default.
 
-* @CONSISTENCY ANY@
-* @CONSISTENCY ONE@ (default)
-* @CONSISTENCY QUORUM@
-* @CONSISTENCY ALL@
-* @CONSISTENCY LOCAL_QUORUM@
-* @CONSISTENCY EACH_QUORUM@
-
-h3(#terms). Term specification
-
-Terms are used in statements to specify things such as keyspaces, column 
families, indexes, column names and values, and keyword arguments.  The rules 
governing term specification are as follows:
-
-* Any single quoted string literal (example: @'apple'@).
-* Unquoted alpha-numeric strings that begin with a letter (example: @carrot@).
-* Unquoted numeric literals (example: @100@).
-* UUID strings in hyphen-delimited hex notation (example: 
@1438fc5c-4ff6-11e0-b97f-0026c650d722@). 
-
-Terms which do not conform to these rules result in an exception.
-
-How column name/value terms are interpreted is determined by the configured 
type.
-
-|_. type|_. term|
-|ascii|Any string which can be decoded using ASCII charset|
-|text / varchar|Any string which can be decoded using UTF8 charset|
-|uuid|Standard UUID string format (hyphen-delimited hex notation)|
-|uuid|Standard UUID string format (hyphen-delimited hex notation)|
-|uuid|The string @now@, to represent a type-1 (time-based) UUID with a 
date-time component based on the current time|
-|uuid|Numeric value representing milliseconds since epoch|
-|uuid|An "iso8601 timestamp":http://en.wikipedia.org/wiki/8601|
-|int|Integer value capable of fitting in 8 bytes (same as bigint)|
-|bigint|Integer value capable of fitting in 8 bytes|
-|varint|Integer value of arbitrary size|
-|bytea|Hex-encoded strings (converted directly to the corresponding bytes)|
+Consult your Cassandra documentation for information about how consistency 
levels work.
 
 h1. Versioning
 
 Versioning of the CQL language adheres to the "Semantic 
Versioning":http://semver.org guidelines.  Versions take the form X.Y.Z where 
X, Y, and Z are integer values representing major, minor, and patch level 
respectively.  There is no correlation between Cassandra release versions and 
the CQL language version.
 
 |_. version|_. description|
-|Patch|The patch version is incremented when bugs are fixed.|
-|Minor|Minor version increments occur when new, but backward compatible, 
functionality is introduced.|
 |Major|The major version _must_ be bumped when backward incompatible changes 
are introduced.  This should rarely (if ever) occur.|
+|Minor|Minor version increments occur when new, but backward compatible, 
functionality is introduced.|
+|Patch|The patch version is incremented when bugs are fixed.|
 
 h1. Changes
 
-pre. 
+pre.. 
+Wed, 12 Oct 2011 16:53:00 -0500 - paul cannon
+ * Rework whole doc, adding syntax specifics and additional explanations
+
 Fri, 09 Sep 2011 11:43:00 -0500 - Jonathan Ellis
  * add int data type
 

Modified: 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/CollationController.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/CollationController.java?rev=1190412&r1=1190411&r2=1190412&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/CollationController.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/CollationController.java
 Fri Oct 28 15:58:32 2011
@@ -19,6 +19,7 @@
  */
 package org.apache.cassandra.db;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.*;
 
@@ -28,6 +29,7 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.db.columniterator.IColumnIterator;
 import org.apache.cassandra.db.columniterator.SimpleAbstractColumnIterator;
+import org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy;
 import org.apache.cassandra.db.filter.NamesQueryFilter;
 import org.apache.cassandra.db.filter.QueryFilter;
 import org.apache.cassandra.db.marshal.CounterColumnType;
@@ -35,7 +37,6 @@ import org.apache.cassandra.io.sstable.S
 import org.apache.cassandra.io.sstable.SSTableReader;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.utils.CloseableIterator;
-import org.apache.cassandra.utils.IntervalTree.Interval;
 
 public class CollationController
 {
@@ -149,6 +150,21 @@ public class CollationController
             ColumnFamily returnCF = container.cloneMeShallow();
             filter.collateColumns(returnCF, 
Collections.singletonList(toCollate), cfs.metadata.comparator, gcBefore);
 
+            // "hoist up" the requested data into a more recent sstable
+            if (sstablesIterated >= cfs.getMinimumCompactionThreshold() && 
cfs.getCompactionStrategy() instanceof SizeTieredCompactionStrategy)
+            {
+                RowMutation rm = new RowMutation(cfs.table.name, new 
Row(filter.key, returnCF));
+                try
+                {
+                    rm.applyUnsafe(); // skipping commitlog is fine since 
we're just de-fragmenting existing data
+                }
+                catch (IOException e)
+                {
+                    // log and allow the result to be returned
+                    logger.error("Error re-writing read results", e);
+                }
+            }
+
             // Caller is responsible for final removeDeletedCF.  This is 
important for cacheRow to work correctly:
             return returnCF;
         }


Reply via email to