Source code for benchbox.platforms.clickhouse.adapter

"""Primary adapter for ClickHouse platforms."""

from __future__ import annotations

import logging

from benchbox.platforms.base import DriverIsolationCapability, PlatformAdapter
from benchbox.utils.dependencies import check_platform_dependencies, get_dependency_error_message

from .deployment_mode import CLICKHOUSE_DEPLOYMENT_MODE_VALUES, resolve_clickhouse_deployment_mode
from .diagnostics import ClickHouseDiagnosticsMixin
from .metadata import ClickHouseMetadataMixin
from .setup import ClickHouseSetupMixin
from .tuning import ClickHouseTuningMixin
from .workload import ClickHouseWorkloadMixin

logger = logging.getLogger(__name__)


[docs] class ClickHouseAdapter( ClickHouseMetadataMixin, ClickHouseSetupMixin, ClickHouseDiagnosticsMixin, ClickHouseWorkloadMixin, ClickHouseTuningMixin, PlatformAdapter, ): """High-level adapter coordinating ClickHouse operations. Known Limitations: TPC-DS queries with known incompatibilities: - Query 14: INTERSECT DISTINCT requires manual alias addition - Query 30: Query plan cloning not implemented for aggregation steps (Code: 48) - Query 81: Query plan cloning not implemented for aggregation steps (Code: 48) These queries may fail even with transformations applied and require manual query rewriting or ClickHouse engine improvements. """ driver_isolation_capability = DriverIsolationCapability.NOT_FEASIBLE # Known incompatible queries that may fail despite transformations KNOWN_INCOMPATIBLE_QUERIES = { "tpcds": [14, 30, 81], }
[docs] def __init__(self, **config): super().__init__(**config) self._dialect = "clickhouse" # Determine deployment mode (from factory via colon syntax: clickhouse:local). # Default to local mode for easiest onboarding (no credentials required). is_cloud_subclass = config.get("_is_cloud_subclass", False) self.deployment_mode = resolve_clickhouse_deployment_mode(config, allow_cloud=is_cloud_subclass) # Validate deployment mode # Note: "cloud" mode is now a separate first-class platform: clickhouse-cloud # The _is_cloud_subclass flag is set by ClickHouseCloudAdapter to bypass this check valid_modes = set(CLICKHOUSE_DEPLOYMENT_MODE_VALUES) # Cloud mode is valid when called from ClickHouseCloudAdapter if is_cloud_subclass: valid_modes.add("cloud") if self.deployment_mode not in valid_modes: raise ValueError( f"Invalid ClickHouse deployment mode '{self.deployment_mode}'. " f"Valid modes: {', '.join(sorted(valid_modes))}" ) # Mode-specific validation and setup if self.deployment_mode == "server": available, missing = check_platform_dependencies("clickhouse", ["clickhouse-driver"]) if not available: error_msg = get_dependency_error_message("clickhouse", missing) raise ImportError(error_msg) self._setup_server_mode(config) elif self.deployment_mode == "local": import importlib.util if importlib.util.find_spec("chdb") is None: raise ImportError( "ClickHouse local mode requires chDB but it is not installed.\n" "To resolve this issue:\n" " 1. Install chDB: uv add chdb\n" " 2. Or switch to server mode: --platform clickhouse:server\n" " 3. Or use ClickHouse Cloud: --platform clickhouse-cloud\n" " 4. Or use a different platform (e.g., DuckDB)\n" "\nFor more information about chDB, visit: https://github.com/chdb-io/chdb" ) self._setup_local_mode(config) elif self.deployment_mode == "cloud": # Cloud mode is only valid when called from ClickHouseCloudAdapter # Check for clickhouse-connect dependency import importlib.util if importlib.util.find_spec("clickhouse_connect") is None: raise ImportError( "ClickHouse Cloud requires clickhouse-connect but it is not installed.\n" "To resolve this issue:\n" " 1. Install ClickHouse Cloud extra: uv add benchbox --extra clickhouse-cloud\n" " 2. Or use local mode: --platform clickhouse-local\n" "\nFor more information, visit: https://clickhouse.com/docs/en/integrations/python" ) self._setup_cloud_mode(config)
__all__ = ["ClickHouseAdapter"]