diff --git a/tariff_fetch/_cli/rateacuity_gas_urdb.py b/tariff_fetch/_cli/rateacuity_gas_urdb.py index fdc59d7..fcc1df7 100644 --- a/tariff_fetch/_cli/rateacuity_gas_urdb.py +++ b/tariff_fetch/_cli/rateacuity_gas_urdb.py @@ -4,7 +4,7 @@ from datetime import date from pathlib import Path from statistics import mean -from typing import cast +from typing import cast, get_args import questionary import tenacity @@ -17,7 +17,7 @@ build_urdb, ) from tariff_fetch.urdb.rateacuity_history_gas.history_data import HistoryData, PercentageRow, Row -from tariff_fetch.urdb.schema import URDBRate +from tariff_fetch.urdb.schema import RateSector, ServiceType, URDBRate from . import console, prompt_filename @@ -99,6 +99,12 @@ def process_rateacuity_gas_urdb(output_folder: Path, state: str, year: int): console.print(f" - {error.row}") proceed = Confirm.ask("Proceed?", console=console) + if proceed and (unknown_non_empty_columns := hd.get_unknown_nonempty_columns()): + console.print("Found following unknown non-empty columns. Their values will be ignored:") + for col in unknown_non_empty_columns: + console.print(f" - {col}") + proceed = Confirm.ask("Proceed?", console=console) + if proceed: apply_percentages = False rows = list(hd.rows()) @@ -112,10 +118,48 @@ def process_rateacuity_gas_urdb(output_folder: Path, state: str, year: int): console.print("Percentages will be applied to the final result as is") apply_percentages = Confirm.ask("Apply percentages? (otherwise percentages will be ignored)") - urdb = build_urdb(rows, apply_percentages) - urdb["utility"] = selected_utility - urdb["name"] = tariff - result.append(urdb) + label = cast( + str | None, questionary.text("Label", default=_utility_name_to_label(selected_utility)).ask() + ) + if label is None: + exit() + sector = cast( + RateSector | None, + questionary.select( + "Sector", + default="Residential", + choices=get_args(RateSector), + ).ask(), + ) + if sector is None: + exit() + + servicetype = cast( + ServiceType | None, + questionary.select( + "Sector", + default="Bundled", + choices=get_args(ServiceType), + ).ask(), + ) + if servicetype is None: + exit() + + try: + urdb = build_urdb(rows, apply_percentages) + except ValueError as e: + console.print(f"Cannot convert to urdb: [red]{e}[/]") + else: + urdb["utility"] = selected_utility + urdb["name"] = tariff + urdb["label"] = label + urdb["sector"] = sector + urdb["servicetype"] = servicetype + urdb["demandunits"] = "kW" + urdb["mincharge"] = 0.0 + urdb["minchargeunits"] = "$/month" + urdb["country"] = "USA" + result.append(urdb) scraping_state = ( scraping_state.back_to_selections() @@ -127,7 +171,14 @@ def process_rateacuity_gas_urdb(output_folder: Path, state: str, year: int): if not (filename := prompt_filename(output_folder, suggested_filename, "json")): return filename.parent.mkdir(exist_ok=True) - _ = filename.write_text(json.dumps(result, indent=2)) + wrapped_result = {"items": result} + _ = filename.write_text(json.dumps(wrapped_result, indent=2)) + + +def _utility_name_to_label(utility_name: str) -> str: + if not utility_name: + return "" + return "".join(w[0].lower() for w in utility_name.split() if w) def _get_percentage_columns(rows: Collection[Row]) -> list[tuple[str, str | None, float]]: diff --git a/tariff_fetch/urdb/rateacuity_history_gas/__init__.py b/tariff_fetch/urdb/rateacuity_history_gas/__init__.py index 5415863..6633b85 100644 --- a/tariff_fetch/urdb/rateacuity_history_gas/__init__.py +++ b/tariff_fetch/urdb/rateacuity_history_gas/__init__.py @@ -5,6 +5,7 @@ from tariff_fetch.urdb.schema import EnergyTier, MonthSchedule, URDBRate +from .exceptions import EmptyBandsError from .history_data import ConsumptionRow, FixedChargeRow, PercentageRow, Row @@ -32,6 +33,8 @@ def _build_energy_schedule_raw(rows: Collection[Row], include_taxes: bool) -> UR summed_bands = tuple( (round(limit) if limit != inf else inf, round(max(0, value), 6)) for limit, value in summed_bands ) + if not summed_bands: + raise EmptyBandsError() # join bands summed_bands = [ *(this for this, next_ in itertools.pairwise(summed_bands) if this[1] != next_[1] and this[0] != 0), diff --git a/tariff_fetch/urdb/rateacuity_history_gas/exceptions.py b/tariff_fetch/urdb/rateacuity_history_gas/exceptions.py index 29c0a02..76018ed 100644 --- a/tariff_fetch/urdb/rateacuity_history_gas/exceptions.py +++ b/tariff_fetch/urdb/rateacuity_history_gas/exceptions.py @@ -1,6 +1,13 @@ from typing_extensions import final, override +@final +class EmptyBandsError(ValueError): + @override + def __str__(self) -> str: + return "Empty monthly bands" + + @final class IncorrectDataframeSchemaMonths(ValueError): @override diff --git a/tariff_fetch/urdb/rateacuity_history_gas/history_data.py b/tariff_fetch/urdb/rateacuity_history_gas/history_data.py index ed11c7d..ed1005b 100644 --- a/tariff_fetch/urdb/rateacuity_history_gas/history_data.py +++ b/tariff_fetch/urdb/rateacuity_history_gas/history_data.py @@ -41,6 +41,17 @@ def rows(self) -> Iterator["Row"]: with contextlib.suppress(RowValidationError): yield _row_to_model(row_dict, location_avg_factor, month_column_names) + def get_unknown_nonempty_columns(self) -> list[str]: + df = self._df + df_schema = df.schema + non_date_columns = {col for col in df_schema if not is_date_column_name(col)} + allow_empty_columns = non_date_columns - FixedChargeRow.model_fields.keys() + return [ + c + for c in allow_empty_columns + if not (df[c].is_null() | (df[c] == "" if df[c].dtype == pl.Utf8 else False)).all() + ] + def validate_rows(self) -> list[RowValidationError]: result: list[RowValidationError] = [] month_column_names = _get_month_column_names(self._df) diff --git a/tariff_fetch/urdb/schema.py b/tariff_fetch/urdb/schema.py index 472bea6..516de10 100644 --- a/tariff_fetch/urdb/schema.py +++ b/tariff_fetch/urdb/schema.py @@ -174,6 +174,8 @@ class URDBRate(TypedDict, total=False): minchargeunits: MinChargeUnit fixedattrs: Attrs + country: str + URDBRateAdapter: TypeAdapter[URDBRate] = TypeAdapter(URDBRate) URDBListAdapter: TypeAdapter[list[URDBRate]] = TypeAdapter(Annotated[list[URDBRate], FailFast()])