Source code for benchbox.utils.scale_factor

"""Scale factor formatting utilities for consistent naming across BenchBox.

Copyright 2026 Joe Harris / BenchBox Project

Licensed under the MIT License. See LICENSE file in the project root for details.
"""



[docs]
def format_scale_factor(scale_factor: float) -> str:
    """Format scale factor for filenames and identifiers.

    Rules:
    - Values >= 1: No leading zero (sf1, sf10, sf100)
    - Values < 1: Leading zero + decimal digits (sf01, sf001, sf0001)
    - Leading zero implies the value is < 1

    Examples:
    - 1.0 -> sf1 (no leading zero)
    - 0.1 -> sf01 (leading zero implies < 1)
    - 0.01 -> sf001 (leading zero implies < 1)
    - 0.001 -> sf0001 (leading zero implies < 1)
    - 10 -> sf10 (no leading zero)
    - 1.5 -> sf15 (remove decimal point, no leading zero)

    Args:
        scale_factor: The scale factor to format

    Returns:
        Formatted scale factor string (e.g., "sf1", "sf01", "sf001")
    """
    if scale_factor >= 1:
        if scale_factor == int(scale_factor):
            # Integer values >= 1: no leading zero
            return f"sf{int(scale_factor)}"
        else:
            # Non-integer values >= 1: remove decimal point
            # 1.5 -> sf15, 2.25 -> sf225
            str_val = f"{scale_factor}".replace(".", "")
            return f"sf{str_val}"
    else:
        # Values < 1: leading zero + decimal digits only
        # Convert to string, remove "0.", add leading zero
        decimal_str = f"{scale_factor:.10f}".rstrip("0")  # Remove trailing zeros
        if "." in decimal_str:
            after_decimal = decimal_str.split(".")[1]
            return f"sf0{after_decimal}"
        else:
            # Edge case: exactly 0
            return "sf0"




[docs]
def format_benchmark_name(benchmark_name: str, scale_factor: float) -> str:
    """Format benchmark name with scale factor.

    Args:
        benchmark_name: Name of the benchmark (e.g., "tpch", "tpcds")
        scale_factor: Scale factor value

    Returns:
        Formatted benchmark name (e.g., "tpch_sf1", "tpcds_sf01")
    """
    sf_str = format_scale_factor(scale_factor)
    return f"{benchmark_name}_{sf_str}"




[docs]
def format_data_directory(benchmark_name: str, scale_factor: float) -> str:
    """Format data directory name with scale factor.

    Args:
        benchmark_name: Name of the benchmark
        scale_factor: Scale factor value

    Returns:
        Formatted directory name (e.g., "tpch_sf1_data", "tpcds_sf01_data")
    """
    sf_str = format_scale_factor(scale_factor)
    return f"{benchmark_name}_{sf_str}_data"




[docs]
def format_schema_name(benchmark_name: str, scale_factor: float) -> str:
    """Format database schema name with scale factor.

    Args:
        benchmark_name: Name of the benchmark
        scale_factor: Scale factor value

    Returns:
        Formatted schema name (e.g., "tpch_sf1", "tpcds_sf01")
    """
    sf_str = format_scale_factor(scale_factor)
    return f"{benchmark_name}_{sf_str}"