GitHub - cloudwp/phpClickHouse: php ClickHouse wrapper

##Features

  • No dependency, only curl
  • Select parallel queries (asynchronous)
  • Parallelizing bulk inserts from CSV file
  • enable_http_compression, for bulk inserts
  • Find active host and check cluster
  • Select WHERE IN ( local csv file )
  • SQL conditions & template
  • tablesSize & databaseSize
  • listPartitions
  • pre production : dropPartition & dropOldPartitions

Start

Connect and select database:

$config=['host'=>'192.168.1.1','port'=>'8123','username'=>'default','password'=>''];
$db=new ClickHouseDB\Client($config);
$db->database('default');

Show tables:

print_r($db->showTables());

Create table:

$db->write('
CREATE TABLE IF NOT EXISTS summing_url_views (
event_date Date DEFAULT toDate(event_time),
event_time DateTime,
site_id Int32,
site_key String,
views Int32,
v_00 Int32,
v_55 Int32
) ENGINE = SummingMergeTree(event_date, (site_id,site_key, event_time, event_date), 8192)
';

Insert data:

$stat=$db->insert('summing_url_views',
    [
        [time(),'HASH1',2345,22,20,2],
        [time(),'HASH2',2345,12,9,3],
        [time(),'HASH3',5345,33,33,0],
        [time(),'HASH3',5345,55,0,55],
    ]
    ,
    ['event_time','site_key','site_id','views','v_00','v_55']
);

Select:

$statement=$db->select('SELECT * FROM summing_url_views LIMIT 2');

Work with Statement:

//Count select rows
$statement->count()
//Count all rows
$statement->countAll()
// fetch one row
$statement->fetchOne()
//get extremes min
print_r($statement->extremesMin());
//totals row
print_r($statement->totals());
//result all
print_r($statement->rows());
//totalTimeRequest
print_r($statement->totalTimeRequest());

Select result as tree:

$statement=$db->select('SELECT event_date,site_key,sum(views),avg(views) FROM summing_url_views WHERE site_id<3333 GROUP BY event_date,url_hash WITH TOTALS');
print_r($statement->rowsAsTree('event_date.site_key'));
/*
(
    [2016-07-18] => Array
        (
            [HASH2] => Array
                (
                    [event_date] => 2016-07-18
                    [url_hash] => HASH2
                    [sum(views)] => 12
                    [avg(views)] => 12
                )
            [HASH1] => Array
                (
                    [event_date] => 2016-07-18
                    [url_hash] => HASH1
                    [sum(views)] => 22
                    [avg(views)] => 22
                )
        )
)
 */

Drop table:

$db->write("DROP TABLE IF EXISTS summing_url_views");

Features

Select parallel queries (asynchronous)

$state1=$db->selectAsync('SELECT 1 as ping');
$state2=$db->selectAsync('SELECT 2 as ping');
//run
$db->executeAsync();
//result
print_r($state1->rows());
print_r($state2->fetchOne('ping'));

Parallelizing massive inserts from CSV file

$file_data_names=[
    '/tmp/clickHouseDB_test.1.data',
    '/tmp/clickHouseDB_test.2.data',
    '/tmp/clickHouseDB_test.3.data',
    '/tmp/clickHouseDB_test.4.data',
    '/tmp/clickHouseDB_test.5.data',
];
// insert all files
$stat=$db->insertBatchFiles('summing_url_views',
   $file_data_names,
   ['event_time','site_key','site_id','views','v_00','v_55']
);

Parallelizing errors

selectAsync without executeAsync

$select=$db->selectAsync('SELECT * FROM summing_url_views LIMIT 1');
$insert=$db->insertBatchFiles('summing_url_views',['/tmp/clickHouseDB_test.1.data'],['event_time']);
// 'Exception' with message 'Queue must be empty, before insertBatch,need executeAsync'

see example/exam5_error_async.php

Gzip & enable_http_compression

On fly read CSV file and compress zlib.deflate.

$db->settings()->max_execution_time(200);
$db->enableHttpCompression(true);

$result_insert=$db->insertBatchFiles('summing_url_views',$file_data_names,[......]);


foreach ($result_insert as $fileName=>$state)
{
    echo "$fileName => ".json_encode($state->info_upload())."\n";
}

see example/exam8_http_gzip_batch_insert.php

Find active host and check cluster

We use in the smi2, DNS Round-Robin.

Set host = "clickhouse.smi2.ru" is A record => [ xdb1.ch1.smi2.ru,xdb1.ch2.smi2.ru,xdb1.ch3.smi2.ru....]

function findActiveHostAndCheckCluster() - ping all IPs in DNS record

then random() select from active list

if dev. server (one IP or host) - no check

see example/exam6_check_cluster.php

$db=new ClickHouseDB\Client($config);
$change_host=true;
$time_out_second=1;
list($resultGoodHost,$resultBadHost,$selectHost)=$db->findActiveHostAndCheckCluster($time_out_second,$change_host);
echo "SelectHost:".$selectHost."\n";

tablesSize & databaseSize

Result in human size

print_r($db->databaseSize()));
print_r($db->tablesSize());
print_r($db->tableSize('summing_partions_views'));

Partitions

$count_result=2;
print_r($db->partitions('summing_partions_views',$count_result));

Drop partitions ( pre production )

$count_old_days=10;
print_r(
    $db->dropOldPartitions('summing_partions_views',$count_old_days);
)
// by `partition_id` 
print_r($db->dropPartition('summing_partions_views','201512'));

Select WHERE IN ( local csv file )

$file_name_data1="/tmp/temp_csv.txt"; // two column file [int,string]
$whereIn=new \ClickHouseDB\WhereInFile();
$whereIn->attachFile($file_name_data1,'namex',['site_id'=>'Int32','site_hash'=>'String'],\ClickHouseDB\WhereInFile::FORMAT_CSV);
$result=$db->select($sql,[],$whereIn);

//see example/exam7_where_in.php

Simple sql conditions & template

$input_params=[
  'select_date'=>['2000-10-10','2000-10-11','2000-10-12'],
  'limit'=>5,
  'from_table'=>'table'
];
$select='
SELECT * FROM {from_table}
WHERE
{if select_date}
event_date IN (:select_date)
{else}
event_date=today()
{/if}
{if limit}
LIMIT {limit}
{/if}
';

$statement=$db->selectAsync($select,$input_params);
echo $statement->sql();
/*
SELECT * FROM table
WHERE
event_date IN ('2000-10-10','2000-10-11','2000-10-12')
LIMIT 5
FORMAT JSON
*/


$input_params['select_date']=false;
$statement=$db->selectAsync($select,$input_params);
echo $statement->sql();
/*
SELECT * FROM table
WHERE
event_date=today()
LIMIT 5
FORMAT JSON
*/


$state1=$db->selectAsync('SELECT 1 as {key} WHERE {key}=:value',['key'=>'ping','value'=>1]);
// SELECT 1 as ping WHERE ping="1"

Todos

  • Write Tests
  • Write docs
  • Fix array insert in row
  • Normal exception
  • add/use composer ?
  • drop include ?
  • find ActiveHost & CheckCluster - how check cluster and replica ?

License

MIT