๐ฟ
Cassandra
Apache Cassandra: CQL, keyspaces, tables, indexes, TTL, partitioning, and cluster operations
cqlsh & Connection
Connect, authenticate, and navigate the CQL shell
bashยทConnect with cqlsh
# Connect to localhost cqlsh # Connect to a remote host cqlsh 10.0.0.1 9042 # Connect with credentials cqlsh 10.0.0.1 -u cassandra -p cassandra # Connect with SSL cqlsh 10.0.0.1 --ssl \ --certfile /path/to/ca.crt \ --userkey /path/to/client.key \ --usercert /path/to/client.crt # Execute a CQL file cqlsh -f schema.cql cqlsh 10.0.0.1 -f schema.cql # Execute inline CQL cqlsh -e "DESCRIBE KEYSPACES" # Set consistency level for the session cqlsh --request-timeout=30
sqlยทcqlsh shell commands
-- Show all keyspaces DESCRIBE KEYSPACES; -- Show all tables in current keyspace DESCRIBE TABLES; -- Show table schema DESCRIBE TABLE users; DESCRIBE TABLE my_keyspace.users; -- Show keyspace schema DESCRIBE KEYSPACE my_keyspace; -- Switch keyspace USE my_keyspace; -- Show cluster info DESCRIBE CLUSTER; -- Show current session info SHOW VERSION; SHOW HOST; -- Tracing โ show execution details for next query TRACING ON; SELECT * FROM users WHERE user_id = 'u1'; TRACING OFF; -- Paging PAGING ON; PAGING 50; -- set page size PAGING OFF; -- Timing โ show query execution time TIMING ON;
Keyspaces
Create, alter, and drop keyspaces with replication strategies
sqlยทCreate & alter keyspaces
-- SimpleStrategy (single DC / development only)
CREATE KEYSPACE my_keyspace
WITH replication = {
'class': 'SimpleStrategy',
'replication_factor': 3
};
-- NetworkTopologyStrategy (production, multi-DC)
CREATE KEYSPACE my_keyspace
WITH replication = {
'class': 'NetworkTopologyStrategy',
'dc1': 3,
'dc2': 2
}
AND durable_writes = true;
-- Create if not exists
CREATE KEYSPACE IF NOT EXISTS my_keyspace
WITH replication = {'class':'SimpleStrategy','replication_factor':1};
-- Alter replication factor
ALTER KEYSPACE my_keyspace
WITH replication = {
'class': 'NetworkTopologyStrategy',
'dc1': 3,
'dc2': 3
};
-- Drop keyspace
DROP KEYSPACE my_keyspace;
DROP KEYSPACE IF EXISTS my_keyspace;Tables
Create, alter, and drop tables with various key designs
sqlยทCreate tables
-- Simple table (partition key only) CREATE TABLE users ( user_id UUID PRIMARY KEY, name TEXT, email TEXT, age INT, active BOOLEAN, created TIMESTAMP ); -- Composite primary key (partition + clustering) CREATE TABLE orders_by_customer ( customer_id UUID, order_id TIMEUUID, status TEXT, total DECIMAL, created_at TIMESTAMP, PRIMARY KEY (customer_id, order_id) ) WITH CLUSTERING ORDER BY (order_id DESC); -- Compound partition key (spreads load across nodes) CREATE TABLE events_by_day ( sensor_id TEXT, date DATE, event_time TIMESTAMP, value DOUBLE, PRIMARY KEY ((sensor_id, date), event_time) ) WITH CLUSTERING ORDER BY (event_time DESC); -- Create if not exists CREATE TABLE IF NOT EXISTS users ( user_id UUID PRIMARY KEY, name TEXT );
sqlยทAlter & drop tables
-- Add a column
ALTER TABLE users ADD phone TEXT;
ALTER TABLE users ADD tags LIST<TEXT>;
-- Drop a column
ALTER TABLE users DROP phone;
-- Rename a column (only clustering columns)
ALTER TABLE orders_by_customer RENAME order_id TO id;
-- Change table options
ALTER TABLE users
WITH gc_grace_seconds = 86400
AND compaction = {
'class': 'LeveledCompactionStrategy',
'sstable_size_in_mb': 160
};
-- Truncate (delete all rows, keep schema)
TRUNCATE TABLE users;
TRUNCATE users;
-- Drop table
DROP TABLE users;
DROP TABLE IF EXISTS users;Data Types & Collections
Scalar types, collections, tuples, and UDTs
sqlยทScalar & UUID types
-- Common scalar types -- UUID โ random UUID: UUID DEFAULT uuid() -- TIMEUUID โ time-based UUID: TIMEUUID (sortable, embeds timestamp) -- TEXT โ UTF-8 string -- INT โ 32-bit integer -- BIGINT โ 64-bit integer -- FLOAT โ 32-bit float -- DOUBLE โ 64-bit float -- DECIMAL โ arbitrary precision -- BOOLEAN โ true/false -- TIMESTAMP โ date + time (ms precision) -- DATE โ date only (no time) -- TIME โ time of day (nanosecond precision) -- BLOB โ arbitrary bytes -- INET โ IPv4 or IPv6 address -- COUNTER โ distributed counter (special rules) -- DURATION โ ISO 8601 duration (e.g. 1h30m) -- Generate IDs in queries INSERT INTO users (user_id, name) VALUES (uuid(), 'Alice'); INSERT INTO events (event_id, ts) VALUES (now(), toTimestamp(now())); -- Extract timestamp from a TIMEUUID SELECT toTimestamp(event_id) FROM events; SELECT dateOf(event_id) FROM events; -- Cassandra 3.x
sqlยทCollections & tuples
-- List (ordered, duplicates allowed) CREATE TABLE profiles ( user_id UUID PRIMARY KEY, emails LIST<TEXT>, scores LIST<INT> ); -- Set (unordered, unique values) CREATE TABLE profiles ( user_id UUID PRIMARY KEY, tags SET<TEXT>, roles SET<TEXT> ); -- Map (key-value pairs) CREATE TABLE profiles ( user_id UUID PRIMARY KEY, attributes MAP<TEXT, TEXT>, counts MAP<TEXT, INT> ); -- Tuple (fixed-length, mixed types) CREATE TABLE readings ( device_id UUID PRIMARY KEY, location TUPLE<FLOAT, FLOAT> -- (lat, lng) ); -- User-Defined Type (UDT) CREATE TYPE address ( street TEXT, city TEXT, zip TEXT, country TEXT ); CREATE TABLE customers ( id UUID PRIMARY KEY, name TEXT, address FROZEN<address> );
CRUD โ INSERT, SELECT, UPDATE, DELETE
Insert, read, update, and delete rows
sqlยทINSERT
-- Basic insert
INSERT INTO users (user_id, name, email, age)
VALUES (uuid(), 'Alice', 'alice@example.com', 30);
-- Insert with TTL (row expires after N seconds)
INSERT INTO sessions (session_id, user_id, data)
VALUES (uuid(), 'u1', 'payload')
USING TTL 3600;
-- Insert with a specific timestamp (microseconds)
INSERT INTO events (event_id, data)
VALUES (now(), 'click')
USING TIMESTAMP 1718000000000000;
-- Insert only if row does NOT exist (lightweight transaction)
INSERT INTO users (user_id, email)
VALUES (uuid(), 'bob@example.com')
IF NOT EXISTS;
-- Insert into a collection
INSERT INTO profiles (user_id, tags, attributes)
VALUES (uuid(), {'admin','beta'}, {'plan':'pro','region':'us-east'});sqlยทSELECT
-- Fetch all columns for a partition
SELECT * FROM users WHERE user_id = 'some-uuid';
-- Specific columns
SELECT name, email FROM users WHERE user_id = 'some-uuid';
-- Clustering column range
SELECT * FROM orders_by_customer
WHERE customer_id = 'cid'
AND order_id >= minTimeuuid('2024-01-01')
AND order_id <= maxTimeuuid('2024-12-31');
-- Allow filtering (use sparingly โ full scan)
SELECT * FROM users WHERE age > 25 ALLOW FILTERING;
-- Limit results
SELECT * FROM orders_by_customer
WHERE customer_id = 'cid'
LIMIT 10;
-- Token-based pagination (consistent paging)
SELECT * FROM users WHERE token(user_id) > token('last-seen-id') LIMIT 100;
-- Count rows (expensive on large partitions)
SELECT COUNT(*) FROM orders_by_customer WHERE customer_id = 'cid';
-- TTL and write time of a column
SELECT name, TTL(name), WRITETIME(name) FROM users WHERE user_id = 'some-uuid';sqlยทUPDATE & DELETE
-- Update specific columns
UPDATE users SET name = 'Alice Smith', age = 31
WHERE user_id = 'some-uuid';
-- Update with TTL
UPDATE sessions USING TTL 1800
SET data = 'new-payload'
WHERE session_id = 'sid';
-- Conditional update (lightweight transaction)
UPDATE users SET name = 'Alice B.'
WHERE user_id = 'some-uuid'
IF name = 'Alice';
-- Increment a counter
UPDATE page_views SET views = views + 1
WHERE page_id = 'home';
-- Append to a list
UPDATE profiles SET emails = emails + ['new@example.com']
WHERE user_id = 'some-uuid';
-- Add to a set
UPDATE profiles SET tags = tags + {'vip'}
WHERE user_id = 'some-uuid';
-- Update a map entry
UPDATE profiles SET attributes['plan'] = 'enterprise'
WHERE user_id = 'some-uuid';
-- Delete a row
DELETE FROM users WHERE user_id = 'some-uuid';
-- Delete specific columns
DELETE name, email FROM users WHERE user_id = 'some-uuid';
-- Delete a range of clustering rows
DELETE FROM orders_by_customer
WHERE customer_id = 'cid'
AND order_id < minTimeuuid('2023-01-01');
-- Conditional delete (LWT)
DELETE FROM users WHERE user_id = 'some-uuid' IF EXISTS;Batches
LOGGED, UNLOGGED, and COUNTER batches
sqlยทBATCH statements
-- Logged batch (atomic โ use sparingly, adds coordinator overhead)
BEGIN BATCH
INSERT INTO users (user_id, name) VALUES (uuid(), 'Alice');
INSERT INTO users_by_email (email, user_id) VALUES ('alice@example.com', 'some-uuid');
APPLY BATCH;
-- Unlogged batch (no atomicity guarantee โ best for same-partition writes)
BEGIN UNLOGGED BATCH
UPDATE orders SET status = 'shipped' WHERE customer_id = 'c1' AND order_id = 'o1';
UPDATE orders SET status = 'shipped' WHERE customer_id = 'c1' AND order_id = 'o2';
APPLY BATCH;
-- Counter batch
BEGIN COUNTER BATCH
UPDATE page_views SET views = views + 1 WHERE page_id = 'home';
UPDATE page_views SET views = views + 1 WHERE page_id = 'about';
APPLY BATCH;
-- Batch with USING TIMESTAMP (all statements share the timestamp)
BEGIN BATCH USING TIMESTAMP 1718000000000000
INSERT INTO a (id, v) VALUES ('1', 'x');
INSERT INTO b (id, v) VALUES ('2', 'y');
APPLY BATCH;Indexes
Secondary indexes, SASI indexes, and materialized views
sqlยทSecondary & SASI indexes
-- Secondary index (avoid on high-cardinality columns)
CREATE INDEX ON users (email);
CREATE INDEX users_email_idx ON users (email);
-- Index on a collection value
CREATE INDEX ON profiles (VALUES(tags));
CREATE INDEX ON profiles (KEYS(attributes));
CREATE INDEX ON profiles (ENTRIES(attributes));
-- SASI index (Cassandra 3.4+) โ supports LIKE and range on text
CREATE CUSTOM INDEX ON users (name)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {
'mode': 'CONTAINS',
'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
'case_sensitive': 'false'
};
-- Query using SASI
SELECT * FROM users WHERE name LIKE '%alice%';
-- Drop an index
DROP INDEX users_email_idx;
DROP INDEX IF EXISTS users_email_idx;sqlยทMaterialized Views
-- Base table CREATE TABLE users ( user_id UUID PRIMARY KEY, email TEXT, country TEXT, name TEXT ); -- Materialized view partitioned by country CREATE MATERIALIZED VIEW users_by_country AS SELECT * FROM users WHERE country IS NOT NULL AND user_id IS NOT NULL PRIMARY KEY (country, user_id) WITH CLUSTERING ORDER BY (user_id ASC); -- Query the view SELECT * FROM users_by_country WHERE country = 'US'; -- Alter a materialized view (only table properties) ALTER MATERIALIZED VIEW users_by_country WITH gc_grace_seconds = 86400; -- Drop a materialized view DROP MATERIALIZED VIEW users_by_country;
TTL & Tombstones
Set, check, and manage time-to-live and tombstone tuning
sqlยทTTL operations
-- Insert with TTL (seconds)
INSERT INTO sessions (id, token) VALUES ('s1', 'abc') USING TTL 3600;
-- Update with TTL
UPDATE sessions USING TTL 1800 SET token = 'xyz' WHERE id = 's1';
-- Set default TTL on a table
CREATE TABLE cache (
key TEXT PRIMARY KEY,
value TEXT
) WITH default_time_to_live = 86400; -- 24 hours
ALTER TABLE cache WITH default_time_to_live = 43200;
-- Check remaining TTL on a column
SELECT TTL(token) FROM sessions WHERE id = 's1';
-- Check write timestamp (microseconds since epoch)
SELECT WRITETIME(token) FROM sessions WHERE id = 's1';
-- Remove TTL from a column (set to 0)
UPDATE sessions USING TTL 0 SET token = 'permanent' WHERE id = 's1';Consistency Levels
Read and write consistency level reference
sqlยทSet consistency in cqlsh
-- Check current consistency level CONSISTENCY; -- Set consistency level for the session CONSISTENCY ONE; CONSISTENCY QUORUM; CONSISTENCY ALL; CONSISTENCY LOCAL_QUORUM; CONSISTENCY EACH_QUORUM; -- Set serial consistency (for lightweight transactions) SERIAL CONSISTENCY LOCAL_SERIAL; /* Consistency level reference: Write levels: ANY โ at least 1 node (even hinted handoff) ONE โ 1 replica acknowledges TWO โ 2 replicas acknowledge THREE โ 3 replicas acknowledge QUORUM โ majority of replicas (RF/2 + 1) LOCAL_QUORUM โ quorum in local DC only EACH_QUORUM โ quorum in every DC ALL โ all replicas must acknowledge Read levels: ONE โ 1 replica responds TWO / THREE โ 2 / 3 replicas respond QUORUM โ majority of replicas LOCAL_QUORUM โ quorum in local DC ALL โ all replicas respond SERIAL โ linearizable read (LWT) LOCAL_SERIAL โ linearizable in local DC Strong consistency: write ALL + read ONE or write QUORUM + read QUORUM */
nodetool โ Cluster Operations
Monitor, repair, compact, and manage Cassandra nodes
bashยทCluster status & info
# Ring status โ shows node health, load, token ownership nodetool status # Status for a specific keyspace nodetool status my_keyspace # Show gossip info for all nodes nodetool gossipinfo # Show node info (uptime, load, heap usage) nodetool info # Show token ranges owned by this node nodetool ring # Show datacenter / rack topology nodetool describecluster # List all keyspaces and tables nodetool describering my_keyspace # Thread pool stats (useful to spot overload) nodetool tpstats # Compaction stats nodetool compactionstats # Show table-level stats (read/write latency, bloom filter, etc.) nodetool tablestats my_keyspace.users nodetool cfstats my_keyspace.users # older alias
bashยทRepair, flush & compact
# Full repair (anti-entropy โ run regularly) nodetool repair # Repair a specific keyspace nodetool repair my_keyspace # Repair a specific table nodetool repair my_keyspace users # Incremental repair (only unrepaired data) nodetool repair --incremental my_keyspace # Flush memtables to SSTables nodetool flush nodetool flush my_keyspace users # Major compaction (merge all SSTables โ can be slow) nodetool compact nodetool compact my_keyspace users # Compact a single partition key nodetool compact my_keyspace users some-partition-key # Scrub SSTables (detect and remove corrupted rows) nodetool scrub my_keyspace users # Upgrade SSTables to current format nodetool upgradesstables my_keyspace
bashยทNode lifecycle
# Drain node before shutdown (flushes, stops accepting writes) nodetool drain # Graceful shutdown after drain nodetool drain && sudo systemctl stop cassandra # Decommission (remove node from ring, stream data to others) nodetool decommission # Remove a dead node by host ID nodetool removenode <host-id> nodetool status # get host-id from here # Bootstrap a replacement node # Set in cassandra.yaml: replace_address_first_boot: <dead-node-ip> # Rebuild a node from peers (after replacing) nodetool rebuild -- <source-datacenter> # Move a node to a new token (rarely needed with vnodes) nodetool move <new-token> # Pause / resume compaction nodetool disableautocompaction my_keyspace nodetool enableautocompaction my_keyspace
Performance & Tuning
Compaction strategies, caching, and key design tips
sqlยทTable tuning options
-- Compaction strategy
ALTER TABLE users WITH compaction = {
'class': 'LeveledCompactionStrategy',
'sstable_size_in_mb': 160
};
ALTER TABLE time_series WITH compaction = {
'class': 'TimeWindowCompactionStrategy',
'compaction_window_unit': 'DAYS',
'compaction_window_size': 1
};
ALTER TABLE write_heavy WITH compaction = {
'class': 'SizeTieredCompactionStrategy',
'min_threshold': 4,
'max_threshold': 32
};
-- Caching
ALTER TABLE users WITH caching = {
'keys': 'ALL',
'rows_per_partition': '100'
};
-- Compression
ALTER TABLE users WITH compression = {
'class': 'LZ4Compressor',
'chunk_length_in_kb': 64
};
-- gc_grace_seconds (tombstone expiry โ must be > repair interval)
ALTER TABLE users WITH gc_grace_seconds = 864000; -- 10 days
-- Bloom filter false-positive chance (lower = more memory, fewer disk reads)
ALTER TABLE users WITH bloom_filter_fp_chance = 0.01;bashยทcassandra-stress (load testing)
# Basic write stress test cassandra-stress write n=1000000 \ -node 10.0.0.1 \ -rate threads=50 # Read stress test cassandra-stress read n=500000 \ -node 10.0.0.1 \ -rate threads=50 # Mixed workload (70% read / 30% write) cassandra-stress mixed ratio\(write=3,read=7\) n=1000000 \ -node 10.0.0.1 \ -rate threads=100 # Use a custom YAML profile cassandra-stress user \ profile=stress-profile.yaml \ ops\(insert=1,read=3\) \ n=500000 \ -node 10.0.0.1 # Print stats every 10 seconds cassandra-stress write n=1000000 -node 10.0.0.1 -log interval=10
Schema & Migration Patterns
Schema export, zero-downtime changes, and query-driven design tips
bashยทExport & compare schema
# Export full schema to a file cqlsh -e "DESCRIBE SCHEMA" > full_schema.cql # Export a single keyspace cqlsh -e "DESCRIBE KEYSPACE my_keyspace" > my_keyspace.cql # Export a single table cqlsh -e "DESCRIBE TABLE my_keyspace.users" > users_table.cql # Apply a schema file cqlsh -f schema_changes.cql # Apply with host and credentials cqlsh 10.0.0.1 -u cassandra -p cassandra -f migration.cql # Diff two schema files diff schema_before.cql schema_after.cql
sqlยทZero-downtime schema changes
-- Safe: add a nullable column (no backfill needed) ALTER TABLE users ADD last_login TIMESTAMP; -- Safe: add a new table for a new access pattern CREATE TABLE users_by_email ( email TEXT PRIMARY KEY, user_id UUID ); -- Safe: add an index (built asynchronously) CREATE INDEX IF NOT EXISTS ON users (country); -- Dual-write migration pattern: -- 1. Add new column to existing table ALTER TABLE orders ADD new_status TEXT; -- 2. Write to both old and new columns in application -- 3. Backfill new column (small batches to avoid pressure) -- 4. Switch reads to new column -- 5. Drop old column after gc_grace_seconds has passed ALTER TABLE orders DROP status; /* Key query-driven design rules: - Model tables around query patterns, not entities - Each query should hit exactly one partition - Avoid ALLOW FILTERING in production - Avoid large partitions (> 100 MB or > 100K rows) - Denormalize: duplicate data across tables is normal - Use TIMEUUID for time-ordered clustering keys */