"""Container for the result of running optimization"""
from collections import OrderedDict
from typing import Dict, Optional, Tuple, Union
import numpy as np
import pandas as pd
from cmdstanpy.cmdstan_args import Method, OptimizeArgs
from cmdstanpy.utils import BaseType, get_logger, scan_optimize_csv
from .metadata import InferenceMetadata
from .runset import RunSet
[docs]class CmdStanMLE:
"""
Container for outputs from CmdStan optimization.
Created by :meth:`CmdStanModel.optimize`.
"""
def __init__(self, runset: RunSet) -> None:
"""Initialize object."""
if not runset.method == Method.OPTIMIZE:
raise ValueError(
'Wrong runset method, expecting optimize runset, '
'found method {}'.format(runset.method)
)
self.runset = runset
# info from runset to be exposed
self.converged = runset._check_retcodes()
optimize_args = self.runset._args.method_args
assert isinstance(
optimize_args, OptimizeArgs
) # make the typechecker happy
self._save_iterations = optimize_args.save_iterations
self._set_mle_attrs(runset.csv_files[0])
def __repr__(self) -> str:
repr = 'CmdStanMLE: model={}{}'.format(
self.runset.model, self.runset._args.method_args.compose(0, cmd=[])
)
repr = '{}\n csv_file:\n\t{}\n output_file:\n\t{}'.format(
repr,
'\n\t'.join(self.runset.csv_files),
'\n\t'.join(self.runset.stdout_files),
)
if not self.converged:
repr = '{}\n Warning: invalid estimate, '.format(repr)
repr = '{} optimization failed to converge.'.format(repr)
return repr
def _set_mle_attrs(self, sample_csv_0: str) -> None:
meta = scan_optimize_csv(sample_csv_0, self._save_iterations)
self._metadata = InferenceMetadata(meta)
self._column_names: Tuple[str, ...] = meta['column_names']
self._mle: np.ndarray = meta['mle']
if self._save_iterations:
self._all_iters: np.ndarray = meta['all_iters']
@property
def column_names(self) -> Tuple[str, ...]:
"""
Names of estimated quantities, includes joint log probability,
and all parameters, transformed parameters, and generated quantities.
"""
return self._column_names
@property
def metadata(self) -> InferenceMetadata:
"""
Returns object which contains CmdStan configuration as well as
information about the names and structure of the inference method
and model output variables.
"""
return self._metadata
@property
def optimized_params_np(self) -> np.ndarray:
"""
Returns all final estimates from the optimizer as a numpy.ndarray
which contains all optimizer outputs, i.e., the value for `lp__`
as well as all Stan program variables.
"""
if not self.converged:
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
return self._mle
@property
def optimized_iterations_np(self) -> Optional[np.ndarray]:
"""
Returns all saved iterations from the optimizer and final estimate
as a numpy.ndarray which contains all optimizer outputs, i.e.,
the value for `lp__` as well as all Stan program variables.
"""
if not self._save_iterations:
get_logger().warning(
'Intermediate iterations not saved to CSV output file. '
'Rerun the optimize method with "save_iterations=True".'
)
return None
if not self.converged:
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
return self._all_iters
@property
def optimized_params_pd(self) -> pd.DataFrame:
"""
Returns all final estimates from the optimizer as a pandas.DataFrame
which contains all optimizer outputs, i.e., the value for `lp__`
as well as all Stan program variables.
"""
if not self.runset._check_retcodes():
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
return pd.DataFrame([self._mle], columns=self.column_names)
@property
def optimized_iterations_pd(self) -> Optional[pd.DataFrame]:
"""
Returns all saved iterations from the optimizer and final estimate
as a pandas.DataFrame which contains all optimizer outputs, i.e.,
the value for `lp__` as well as all Stan program variables.
"""
if not self._save_iterations:
get_logger().warning(
'Intermediate iterations not saved to CSV output file. '
'Rerun the optimize method with "save_iterations=True".'
)
return None
if not self.converged:
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
return pd.DataFrame(self._all_iters, columns=self.column_names)
@property
def optimized_params_dict(self) -> Dict[str, float]:
"""
Returns all estimates from the optimizer, including `lp__` as a
Python Dict. Only returns estimate from final iteration.
"""
if not self.runset._check_retcodes():
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
return OrderedDict(zip(self.column_names, self._mle))
[docs] def stan_variable(
self,
var: Optional[str] = None,
*,
inc_iterations: bool = False,
warn: bool = True,
) -> Union[np.ndarray, float]:
"""
Return a numpy.ndarray which contains the estimates for the
for the named Stan program variable where the dimensions of the
numpy.ndarray match the shape of the Stan program variable.
:param var: variable name
:param inc_iterations: When ``True`` and the intermediate estimates
are included in the output, i.e., the optimizer was run with
``save_iterations=True``, then intermediate estimates are included.
Default value is ``False``.
See Also
--------
CmdStanMLE.stan_variables
CmdStanMCMC.stan_variable
CmdStanVB.stan_variable
CmdStanGQ.stan_variable
"""
if var is None:
raise ValueError('no variable name specified.')
if var not in self._metadata.stan_vars_dims:
raise ValueError('unknown variable name: {}'.format(var))
if warn and inc_iterations and not self._save_iterations:
get_logger().warning(
'Intermediate iterations not saved to CSV output file. '
'Rerun the optimize method with "save_iterations=True".'
)
if warn and not self.runset._check_retcodes():
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
col_idxs = list(self._metadata.stan_vars_cols[var])
if inc_iterations and self._save_iterations:
num_rows = self._all_iters.shape[0]
else:
num_rows = 1
if len(col_idxs) > 1: # container var
dims = (num_rows,) + self._metadata.stan_vars_dims[var]
# pylint: disable=redundant-keyword-arg
if num_rows > 1:
result = self._all_iters[:, col_idxs].reshape(dims, order='F')
else:
result = self._mle[col_idxs].reshape(dims[1:], order="F")
if self._metadata.stan_vars_types[var] == BaseType.COMPLEX:
result = result[..., 0] + 1j * result[..., 1]
return result
else: # scalar var
col_idx = col_idxs[0]
if num_rows > 1:
return self._all_iters[:, col_idx]
else:
return float(self._mle[col_idx])
[docs] def stan_variables(
self, inc_iterations: bool = False
) -> Dict[str, Union[np.ndarray, float]]:
"""
Return a dictionary mapping Stan program variables names
to the corresponding numpy.ndarray containing the inferred values.
:param inc_iterations: When ``True`` and the intermediate estimates
are included in the output, i.e., the optimizer was run with
``save_iterations=True``, then intermediate estimates are included.
Default value is ``False``.
See Also
--------
CmdStanMLE.stan_variable
CmdStanMCMC.stan_variables
CmdStanVB.stan_variables
CmdStanGQ.stan_variables
"""
if not self.runset._check_retcodes():
get_logger().warning(
'Invalid estimate, optimization failed to converge.'
)
result = {}
for name in self._metadata.stan_vars_dims.keys():
result[name] = self.stan_variable(
name, inc_iterations=inc_iterations, warn=False
)
return result
[docs] def save_csvfiles(self, dir: Optional[str] = None) -> None:
"""
Move output CSV files to specified directory. If files were
written to the temporary session directory, clean filename.
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
'bernoulli-201912081451-1.csv'.
:param dir: directory path
See Also
--------
stanfit.RunSet.save_csvfiles
cmdstanpy.from_csv
"""
self.runset.save_csvfiles(dir)