ClickHouse

Show partitions for database

SELECT DISTINCT 
    table, 
    partition
FROM system.parts
WHERE database = 'db'

Show partitions on cluster

SELECT DISTINCT 
    hostName(), 
    database,
    table, 
    partition
FROM cluster('cluster', 'system', 'parts')

Show partition sizes

SELECT 
    database,
    table,
    partition,
    formatReadableSize(sum(bytes)) AS size
FROM system.parts
GROUP BY 
    database,
    table,
    partition
ORDER BY sum(bytes) DESC
LIMIT 10

Show partition sizes on cluster

SELECT 
    hostName(),
    database,
    table,
    partition,
    formatReadableSize(sum(bytes)) AS size
FROM remote('ch-{1..100}.example.net', 'system.parts')
WHERE database = 'db' AND table = 'table_sharded'
GROUP BY 
    hostName(),
    database,
    table,
    partition
ORDER BY sum(bytes) DESC

Show column sizes

SELECT 
    column, 
    formatReadableSize(size)
FROM 
(
    SELECT 
        column, 
        sum(column_bytes_on_disk) AS size
    FROM system.parts_columns
    WHERE (database = 'db') AND (table = 'table_sharded') AND (partition = '202010')
    GROUP BY column
    ORDER BY size ASC
)

Show max_part_count_for_partition metric

SELECT 
    hostName(),
    *
FROM remote('ch-{1..100}.example.net', 'system.asynchronous_metrics')
WHERE metric = 'MaxPartCountForPartition'
ORDER BY hostName() ASC

Merges in tables with ETA

SELECT
    hostName(),
    database,
    table,
    round(elapsed, 0) AS time,
    round(progress, 4) AS percent,
    formatReadableTimeDelta((elapsed / progress) - elapsed) AS ETA,
    num_parts,
    result_part_name
FROM clusterAllReplicas('mycluster', 'system.merges')
ORDER BY (elapsed / percent) - elapsed ASC

Lookup executing query by substring

SELECT substring(query, position(query, 'interesting query part'), 20)
FROM system.processes
WHERE (query LIKE '%INSERT%') AND (user = 'production')

Drop many custom partitions

  1. Export via
SELECT partition
FROM system.parts
WHERE database='{db:String}'
  AND table='{table:String}'
GROUP BY partition
ORDER BY partition INTO outfile '{table:String}.tsv' 
FORMAT TSVRaw
clickhouse-client --param_db='db' --param_table='t'

FORMAT TSVRaw prevents escaping because drop partition query doesn’t need it.

  1. Edit file and leave only partitions to drop.

  2. Drop via bash loop

$ for p in $(cat table.tsv);do echo "$p"; clickhouse-client -h ch-1.example.net --query="alter table db.t on cluster c drop partition $p" ;done

Tail logs

Tail useful part of logs. Excludes stack traces, empty lines, version string. This will not work with trace level logs.

tail -F /var/log/clickhouse-server/clickhouse-server.log | grep -vP '\d\d?\. ' | grep -vP '^ *$' | grep -v 'version '

Distinct errors from log

grep '<Error>' clickhouse-server.log | cut -d' ' -f 7- | sort | uniq -c > errors.uniq.log

Distinct query errors from system.text_log

SELECT
    replaceAll(replaceRegexpAll(replaceRegexpOne(message, '\\(from \\[.*\\)', ''), '\n\\d+\\.[^\n].*', ''), '\n', '') AS m,
    count() AS freq,
    any(query_id) as query_id
FROM remote('ch-{1..100}.example.net', 'system.text_log')
WHERE (level = 'Error') AND (logger_name = 'executeQuery') AND (event_date >= '2021-08-31') AND (event_time >= '2021-08-31 14:45:00') AND (message NOT LIKE '%Received from%')
GROUP BY m
ORDER BY freq DESC