Source code for benchbox.clickbench

"""ClickBench (ClickHouse Analytics Benchmark) implementation.

Copyright 2026 Joe Harris / BenchBox Project

Licensed under the MIT License. See LICENSE file in the project root for details.
"""

from pathlib import Path
from typing import Any, Optional, Union

from benchbox.base import BaseBenchmark
from benchbox.core.clickbench.benchmark import ClickBenchBenchmark



[docs]
class ClickBench(BaseBenchmark):
    """ClickBench (ClickHouse Analytics Benchmark) implementation.

    Provides ClickBench benchmark implementation, including
    data generation and access to the 43 benchmark queries designed for testing
    analytical database performance with web analytics data.

    Official specification: https://github.com/ClickHouse/ClickBench
    Results dashboard: https://benchmark.clickhouse.com/
    """


[docs]
    def __init__(
        self,
        scale_factor: float = 1.0,
        output_dir: Optional[Union[str, Path]] = None,
        **kwargs,
    ) -> None:
        """Initialize ClickBench benchmark instance.

        Args:
            scale_factor: Scale factor for the benchmark (1.0 = ~1M records for testing)
            output_dir: Directory to output generated data files
            **kwargs: Additional implementation-specific options
        """
        super().__init__(scale_factor=scale_factor, output_dir=output_dir, **kwargs)

        # Initialize the actual implementation using common pattern
        self._initialize_benchmark_implementation(ClickBenchBenchmark, scale_factor, output_dir, **kwargs)



[docs]
    def generate_data(self) -> list[Union[str, Path]]:
        """Generate ClickBench benchmark data.

        Returns:
            A list of paths to the generated data files
        """
        result = self._impl.generate_data()
        return list(result.values()) if isinstance(result, dict) else result



[docs]
    def get_queries(self, dialect: Optional[str] = None) -> dict[str, str]:
        """Get all ClickBench benchmark queries.

        Args:
            dialect: Target SQL dialect for translation (e.g., 'duckdb', 'bigquery', 'snowflake')
                    If None, returns queries in their original format.

        Returns:
            Dictionary mapping query IDs (Q1-Q43) to query strings
        """
        return self._impl.get_queries(dialect=dialect)



[docs]
    def get_query(self, query_id: Union[int, str], *, params: Optional[dict[str, Any]] = None) -> str:
        """Get specific ClickBench benchmark query.

        Args:
            query_id: ID of the query to retrieve (Q1-Q43)
            params: Optional parameters to customize the query

        Returns:
            Query string

        Raises:
            ValueError: If query_id is invalid
        """
        return self._impl.get_query(query_id, params=params)



[docs]
    def get_schema(self) -> list[dict]:
        """Get ClickBench schema.

        Returns:
            List of dictionaries describing the tables in the schema
        """
        schema_dict = self._impl.get_schema()
        return list(schema_dict.values())



[docs]
    def get_create_tables_sql(self, dialect: str = "standard", tuning_config=None) -> str:
        """Get SQL to create all ClickBench tables.

        Args:
            dialect: SQL dialect to use
            tuning_config: Unified tuning configuration for constraint settings

        Returns:
            SQL script for creating all tables
        """
        return self._impl.get_create_tables_sql(dialect=dialect, tuning_config=tuning_config)



[docs]
    def translate_query(self, query_id: str, dialect: str) -> str:
        """Translate a ClickBench query to a different SQL dialect.

        Args:
            query_id: The ID of the query to translate (Q1-Q43)
            dialect: The target SQL dialect (postgres, mysql, bigquery, etc.)

        Returns:
            The translated query string

        Raises:
            ValueError: If the query_id is invalid
            ImportError: If sqlglot is not installed
            ValueError: If the dialect is not supported
        """
        return super().translate_query(query_id, dialect)



[docs]
    def get_query_categories(self) -> dict[str, list[str]]:
        """Get ClickBench queries organized by category.

        Returns:
            Dictionary mapping category names to lists of query IDs
        """
        return self._impl.get_query_categories()