Module buildstock_query.tools.characteristics_visualizer.figure
Expand source code
from buildstock_query.tools.upgrades_visualizer.plot_utils import PlotParams, ValueTypes
from buildstock_query.tools.upgrades_visualizer.viz_data import VizData
import plotly.graph_objects as go
import polars as pl
import re
class UpgradesPlot:
def __init__(self, viz_data: VizData) -> None:
self.viz_data = viz_data
def get_ylabel(self, end_use):
if len(end_use) == 1:
return end_use[0]
pure_end_use_name = end_use[0].removeprefix("end_use_")
pure_end_use_name = pure_end_use_name.removeprefix("fuel_use_")
pure_end_use_name = "_".join(pure_end_use_name.split("_")[1:])
return f"{len(end_use)}_fuels_{pure_end_use_name}"
def explode_str(self, input_str):
input_str = str(input_str).lower()
month2num = {"january": 1, "february": 2, "march": 3, "april": 4,
"may": 5, "june": 6, "july": 7, "august": 8,
"september": 9, "october": 10, "november": 11, "december": 12}
input_str = str(month2num[input_str] if input_str in month2num else input_str)
input_str = [
int(x) if x and x[0] in "0123456789" else x
for x in re.split(r"([\<\-])|([0-9]+)", input_str)
]
return tuple("X" if x is None else x for x in input_str)
def get_plot(self, params: PlotParams):
if len(params.group_by) >= 2 or params.upgrade is not None or \
(params.value_type in [ValueTypes.distribution, ValueTypes.scatter] and len(params.group_by) >= 1):
params.upgrade = params.upgrade if params.upgrade else 0
params.group_by = ['upgrade'] if not params.group_by else params.group_by
plot_df = self.viz_data.get_plotting_df(upgrade=params.upgrade, params=params)
else:
params.group_by = ['upgrade'] + params.group_by
plot_df = self.viz_data.get_plotting_df_all_upgrades(params=params)
return self._get_plot(plot_df, params)
def _get_plot(self, df, params: PlotParams):
fig = go.Figure()
counter = 0
counter2 = 0
report_dfs = [pl.DataFrame()]
if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]:
xtitle = ", ".join(params.group_by[1:]) if len(params.group_by) > 1 else params.group_by[0]
ytitle = f"{self.get_ylabel(params.enduses)}_{params.value_type.value}_{params.savings_type.value}"
elif params.value_type in [ValueTypes.distribution]:
xtitle = ", ".join(params.group_by[1:]) if len(params.group_by) > 1 else params.group_by[0]
ytitle = f"{self.get_ylabel(params.enduses)}_{params.savings_type.value}"
else:
assert params.value_type in [ValueTypes.scatter]
xtitle = "baseline_value"
ytitle = f"{self.get_ylabel(params.enduses)}_{params.savings_type.value}"
for grp0, sub_df in df.groupby(params.group_by[0], maintain_order=True):
upgrade = int(grp0) if params.group_by[0] == 'upgrade' else params.upgrade
upgrade = upgrade or 0
yvals = []
xvals = []
second_groups = []
sample_counts = []
upgrades = []
hovervals = []
if len(params.group_by) > 1:
second_plots = [(group_name, group_df) for group_name, group_df in
sub_df.groupby(params.group_by[1:], maintain_order=True)]
else:
second_plots = [(tuple(), sub_df)]
for second_name, second_df in second_plots:
name = ','.join(second_name) if second_name else str(grp0)
count = len(second_df)
mean = pl.mean(second_df['value'])
if counter >= 500:
yvals.append(0.1)
xvals.append("Too many groups")
sample_counts.append(0)
upgrades.append(upgrade)
hovervals.append("Too many groups")
grp0 = "Too many groups"
break
if params.value_type in [ValueTypes.total, ValueTypes.mean, ValueTypes.count]:
if params.value_type == ValueTypes.total:
val = pl.sum(second_df['value'])
elif params.value_type == ValueTypes.count:
val = second_df['building_id'].n_unique()
else:
val = pl.mean(second_df['value'])
val = float(val)
yvals.append(val)
xvals.append(name)
sample_counts.append(count)
upgrades.append(upgrade)
hovertext = f"{self.viz_data.upgrade2name.get(upgrade)}<br>{grp0}<br>{name}<br>Average {mean}."\
f"<br>Sample Count: {count}."
f"<br>Units Count: {count * self.viz_data.sample_weight}."
hovervals.append(hovertext)
second_groups.append(name)
elif params.value_type in [ValueTypes.distribution, ValueTypes.scatter]:
hovertext = [f'{self.viz_data.upgrade2name.get(upgrade)}<br>{grp0}<br>{name}<br>Building:'
f'{bid}<br>Sample Count: {count}'
for bid in second_df['building_id'].to_list()]
if params.value_type == ValueTypes.distribution:
xvals.extend([name] * count)
yvals.extend(second_df['value'].to_list())
second_groups.extend([name] * count)
sample_counts.extend([count] * count)
upgrades.extend([upgrade] * count)
hovervals.extend(hovertext)
else:
xvals.append(second_df['baseline_value'].to_list())
yvals.append(second_df['value'].to_list())
second_groups.append(name)
sample_counts.append([count] * count)
upgrades.append([upgrade] * count)
hovervals.append(hovertext)
counter += 1
counter2 += 1
self._add_plot(params, fig, grp0, yvals, xvals, second_groups, hovervals, len(params.group_by) > 1)
try:
sub_df = pl.DataFrame({xtitle: xvals, ytitle: yvals, 'upgrade': [f'Upgrade {grp0}'] * len(xvals),
'sample_count': sample_counts, 'info': hovervals})
# sub_df = sub_df.with_columns(pl.col(ytitle).cast(pl.Float32))
report_dfs.append(sub_df)
except Exception:
continue
if params.change_type:
title = f"{params.value_type} - {params.savings_type} for {params.change_type} buildings"
else:
title = f'{params.value_type} - {params.savings_type} value'
if params.group_by[0] != "upgrade":
title = f"Upgrade {params.upgrade} - {title}"
self._update_layout(params, fig, xtitle, ytitle, title, len(params.group_by) > 1)
return fig, pl.concat(report_dfs)
def _update_layout(self, params: PlotParams, fig, xtitle, ytitle, title, multi_group=False):
if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]:
fig.update_layout(yaxis_title=ytitle,
barmode='group',
xaxis_title=xtitle,
legend={"title": params.group_by[0]},
title=title)
elif params.value_type in [ValueTypes.distribution]:
fig.update_layout(yaxis_title=ytitle,
boxmode="group" if multi_group else "overlay",
xaxis_title=xtitle,
title=title,
legend={"title": params.group_by[0]},
clickmode='event+select')
elif params.value_type == ValueTypes.scatter:
fig.update_layout(yaxis_title=ytitle,
title=title,
boxmode="group" if multi_group else "overlay",
legend={"title": params.group_by[0]},
clickmode='event+select')
fig.add_annotation(
dict(
x=0.5,
y=-0.20,
showarrow=False,
text=xtitle,
xref="paper",
yref="paper",
xanchor="center",
yanchor="top",
font=dict(size=14)
)
)
def _add_plot(self, params: PlotParams, fig, grp0, yvals, xvals, second_groups, hovervals, multi_group=False):
if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]:
fig.add_trace(go.Bar(
y=yvals,
x=xvals,
hovertext=hovervals,
name=f'{grp0}',
hoverinfo="all"
)).update_traces(
marker={"line": {"width": 0.5, "color": "rgb(0,0,0)"}}
)
elif params.value_type == ValueTypes.distribution:
fig.add_trace(go.Box(
y=yvals,
x=xvals,
name=f'{grp0}',
boxpoints='suspectedoutliers',
boxmean=True, # represent mean
hovertext=hovervals,
hoverinfo="all"
))
elif params.value_type == ValueTypes.scatter:
if multi_group:
self._add_multi_scatter(fig, grp0, list(zip(second_groups, xvals, yvals, hovervals)))
else:
fig.add_trace(go.Scatter(
y=yvals[0],
x=xvals[0],
name=f'{grp0}',
mode='markers',
hovertext=hovervals[0],
hoverinfo="all"))
def _add_multi_scatter(self, fig, grp0, plot_tuples):
# Define the width of each subplot based on total plots and desired gaps
num_plots = len(plot_tuples)
gap_fraction = 0.05 # for a 5% gap
subplot_width = (1.0 - (num_plots-1)*gap_fraction) / num_plots
# Iterate over the tuples and add scatter plots
for index, (name, xvals, yvals, hovervals) in enumerate(plot_tuples):
domain_start = index * (subplot_width + gap_fraction)
domain_end = domain_start + subplot_width
# Constructing the appropriate xaxis and yaxis keys
scatter_xaxis_key = 'x' if index == 0 else f'x{index+1}'
scatter_yaxis_key = 'y' if index == 0 else f'y{index+1}'
layout_xaxis_key = f'xaxis{"" if index == 0 else index+1}'
layout_yaxis_key = f'yaxis{"" if index == 0 else index+1}'
# Add scatter plot
fig.add_trace(go.Scatter(x=xvals,
y=yvals,
hovertext=hovervals[0],
name=grp0,
legendgroup=grp0,
mode='markers',
showlegend=True if index == 0 else False,
xaxis=scatter_xaxis_key,
yaxis=scatter_yaxis_key,
hoverinfo="all"))
# Update the layout with the appropriate axis properties
fig.update_layout({
layout_xaxis_key: {
'domain': [domain_start, domain_end],
'anchor': scatter_yaxis_key,
'title': name
},
layout_yaxis_key: {
'anchor': scatter_xaxis_key
}
})
Classes
class UpgradesPlot (viz_data: VizData)
-
Expand source code
class UpgradesPlot: def __init__(self, viz_data: VizData) -> None: self.viz_data = viz_data def get_ylabel(self, end_use): if len(end_use) == 1: return end_use[0] pure_end_use_name = end_use[0].removeprefix("end_use_") pure_end_use_name = pure_end_use_name.removeprefix("fuel_use_") pure_end_use_name = "_".join(pure_end_use_name.split("_")[1:]) return f"{len(end_use)}_fuels_{pure_end_use_name}" def explode_str(self, input_str): input_str = str(input_str).lower() month2num = {"january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6, "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12} input_str = str(month2num[input_str] if input_str in month2num else input_str) input_str = [ int(x) if x and x[0] in "0123456789" else x for x in re.split(r"([\<\-])|([0-9]+)", input_str) ] return tuple("X" if x is None else x for x in input_str) def get_plot(self, params: PlotParams): if len(params.group_by) >= 2 or params.upgrade is not None or \ (params.value_type in [ValueTypes.distribution, ValueTypes.scatter] and len(params.group_by) >= 1): params.upgrade = params.upgrade if params.upgrade else 0 params.group_by = ['upgrade'] if not params.group_by else params.group_by plot_df = self.viz_data.get_plotting_df(upgrade=params.upgrade, params=params) else: params.group_by = ['upgrade'] + params.group_by plot_df = self.viz_data.get_plotting_df_all_upgrades(params=params) return self._get_plot(plot_df, params) def _get_plot(self, df, params: PlotParams): fig = go.Figure() counter = 0 counter2 = 0 report_dfs = [pl.DataFrame()] if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]: xtitle = ", ".join(params.group_by[1:]) if len(params.group_by) > 1 else params.group_by[0] ytitle = f"{self.get_ylabel(params.enduses)}_{params.value_type.value}_{params.savings_type.value}" elif params.value_type in [ValueTypes.distribution]: xtitle = ", ".join(params.group_by[1:]) if len(params.group_by) > 1 else params.group_by[0] ytitle = f"{self.get_ylabel(params.enduses)}_{params.savings_type.value}" else: assert params.value_type in [ValueTypes.scatter] xtitle = "baseline_value" ytitle = f"{self.get_ylabel(params.enduses)}_{params.savings_type.value}" for grp0, sub_df in df.groupby(params.group_by[0], maintain_order=True): upgrade = int(grp0) if params.group_by[0] == 'upgrade' else params.upgrade upgrade = upgrade or 0 yvals = [] xvals = [] second_groups = [] sample_counts = [] upgrades = [] hovervals = [] if len(params.group_by) > 1: second_plots = [(group_name, group_df) for group_name, group_df in sub_df.groupby(params.group_by[1:], maintain_order=True)] else: second_plots = [(tuple(), sub_df)] for second_name, second_df in second_plots: name = ','.join(second_name) if second_name else str(grp0) count = len(second_df) mean = pl.mean(second_df['value']) if counter >= 500: yvals.append(0.1) xvals.append("Too many groups") sample_counts.append(0) upgrades.append(upgrade) hovervals.append("Too many groups") grp0 = "Too many groups" break if params.value_type in [ValueTypes.total, ValueTypes.mean, ValueTypes.count]: if params.value_type == ValueTypes.total: val = pl.sum(second_df['value']) elif params.value_type == ValueTypes.count: val = second_df['building_id'].n_unique() else: val = pl.mean(second_df['value']) val = float(val) yvals.append(val) xvals.append(name) sample_counts.append(count) upgrades.append(upgrade) hovertext = f"{self.viz_data.upgrade2name.get(upgrade)}<br>{grp0}<br>{name}<br>Average {mean}."\ f"<br>Sample Count: {count}." f"<br>Units Count: {count * self.viz_data.sample_weight}." hovervals.append(hovertext) second_groups.append(name) elif params.value_type in [ValueTypes.distribution, ValueTypes.scatter]: hovertext = [f'{self.viz_data.upgrade2name.get(upgrade)}<br>{grp0}<br>{name}<br>Building:' f'{bid}<br>Sample Count: {count}' for bid in second_df['building_id'].to_list()] if params.value_type == ValueTypes.distribution: xvals.extend([name] * count) yvals.extend(second_df['value'].to_list()) second_groups.extend([name] * count) sample_counts.extend([count] * count) upgrades.extend([upgrade] * count) hovervals.extend(hovertext) else: xvals.append(second_df['baseline_value'].to_list()) yvals.append(second_df['value'].to_list()) second_groups.append(name) sample_counts.append([count] * count) upgrades.append([upgrade] * count) hovervals.append(hovertext) counter += 1 counter2 += 1 self._add_plot(params, fig, grp0, yvals, xvals, second_groups, hovervals, len(params.group_by) > 1) try: sub_df = pl.DataFrame({xtitle: xvals, ytitle: yvals, 'upgrade': [f'Upgrade {grp0}'] * len(xvals), 'sample_count': sample_counts, 'info': hovervals}) # sub_df = sub_df.with_columns(pl.col(ytitle).cast(pl.Float32)) report_dfs.append(sub_df) except Exception: continue if params.change_type: title = f"{params.value_type} - {params.savings_type} for {params.change_type} buildings" else: title = f'{params.value_type} - {params.savings_type} value' if params.group_by[0] != "upgrade": title = f"Upgrade {params.upgrade} - {title}" self._update_layout(params, fig, xtitle, ytitle, title, len(params.group_by) > 1) return fig, pl.concat(report_dfs) def _update_layout(self, params: PlotParams, fig, xtitle, ytitle, title, multi_group=False): if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]: fig.update_layout(yaxis_title=ytitle, barmode='group', xaxis_title=xtitle, legend={"title": params.group_by[0]}, title=title) elif params.value_type in [ValueTypes.distribution]: fig.update_layout(yaxis_title=ytitle, boxmode="group" if multi_group else "overlay", xaxis_title=xtitle, title=title, legend={"title": params.group_by[0]}, clickmode='event+select') elif params.value_type == ValueTypes.scatter: fig.update_layout(yaxis_title=ytitle, title=title, boxmode="group" if multi_group else "overlay", legend={"title": params.group_by[0]}, clickmode='event+select') fig.add_annotation( dict( x=0.5, y=-0.20, showarrow=False, text=xtitle, xref="paper", yref="paper", xanchor="center", yanchor="top", font=dict(size=14) ) ) def _add_plot(self, params: PlotParams, fig, grp0, yvals, xvals, second_groups, hovervals, multi_group=False): if params.value_type in [ValueTypes.mean, ValueTypes.total, ValueTypes.count]: fig.add_trace(go.Bar( y=yvals, x=xvals, hovertext=hovervals, name=f'{grp0}', hoverinfo="all" )).update_traces( marker={"line": {"width": 0.5, "color": "rgb(0,0,0)"}} ) elif params.value_type == ValueTypes.distribution: fig.add_trace(go.Box( y=yvals, x=xvals, name=f'{grp0}', boxpoints='suspectedoutliers', boxmean=True, # represent mean hovertext=hovervals, hoverinfo="all" )) elif params.value_type == ValueTypes.scatter: if multi_group: self._add_multi_scatter(fig, grp0, list(zip(second_groups, xvals, yvals, hovervals))) else: fig.add_trace(go.Scatter( y=yvals[0], x=xvals[0], name=f'{grp0}', mode='markers', hovertext=hovervals[0], hoverinfo="all")) def _add_multi_scatter(self, fig, grp0, plot_tuples): # Define the width of each subplot based on total plots and desired gaps num_plots = len(plot_tuples) gap_fraction = 0.05 # for a 5% gap subplot_width = (1.0 - (num_plots-1)*gap_fraction) / num_plots # Iterate over the tuples and add scatter plots for index, (name, xvals, yvals, hovervals) in enumerate(plot_tuples): domain_start = index * (subplot_width + gap_fraction) domain_end = domain_start + subplot_width # Constructing the appropriate xaxis and yaxis keys scatter_xaxis_key = 'x' if index == 0 else f'x{index+1}' scatter_yaxis_key = 'y' if index == 0 else f'y{index+1}' layout_xaxis_key = f'xaxis{"" if index == 0 else index+1}' layout_yaxis_key = f'yaxis{"" if index == 0 else index+1}' # Add scatter plot fig.add_trace(go.Scatter(x=xvals, y=yvals, hovertext=hovervals[0], name=grp0, legendgroup=grp0, mode='markers', showlegend=True if index == 0 else False, xaxis=scatter_xaxis_key, yaxis=scatter_yaxis_key, hoverinfo="all")) # Update the layout with the appropriate axis properties fig.update_layout({ layout_xaxis_key: { 'domain': [domain_start, domain_end], 'anchor': scatter_yaxis_key, 'title': name }, layout_yaxis_key: { 'anchor': scatter_xaxis_key } })
Methods
def explode_str(self, input_str)
-
Expand source code
def explode_str(self, input_str): input_str = str(input_str).lower() month2num = {"january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6, "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12} input_str = str(month2num[input_str] if input_str in month2num else input_str) input_str = [ int(x) if x and x[0] in "0123456789" else x for x in re.split(r"([\<\-])|([0-9]+)", input_str) ] return tuple("X" if x is None else x for x in input_str)
def get_plot(self, params: PlotParams)
-
Expand source code
def get_plot(self, params: PlotParams): if len(params.group_by) >= 2 or params.upgrade is not None or \ (params.value_type in [ValueTypes.distribution, ValueTypes.scatter] and len(params.group_by) >= 1): params.upgrade = params.upgrade if params.upgrade else 0 params.group_by = ['upgrade'] if not params.group_by else params.group_by plot_df = self.viz_data.get_plotting_df(upgrade=params.upgrade, params=params) else: params.group_by = ['upgrade'] + params.group_by plot_df = self.viz_data.get_plotting_df_all_upgrades(params=params) return self._get_plot(plot_df, params)
def get_ylabel(self, end_use)
-
Expand source code
def get_ylabel(self, end_use): if len(end_use) == 1: return end_use[0] pure_end_use_name = end_use[0].removeprefix("end_use_") pure_end_use_name = pure_end_use_name.removeprefix("fuel_use_") pure_end_use_name = "_".join(pure_end_use_name.split("_")[1:]) return f"{len(end_use)}_fuels_{pure_end_use_name}"