Source code for portfolio_optimization.components.io

"""This module implements I/O"""
from __future__ import annotations

from pathlib import Path
from typing import TypeVar, cast

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from pandas import DataFrame

DEFAULT_REQUIRED_COLUMN_NAMES = [
    "asset",
    "outstanding_now",
    "min_outstanding_future",
    "max_outstanding_future",
    "income_now",
    "regcap_now",
]

PortfolioDataT = TypeVar("PortfolioDataT", bound="PortfolioData")


[docs]class PortfolioData: """The ``PortfolioData`` stores the data used for portfolio optimization."""
[docs] def __init__( self, portfolio_dataframe: DataFrame, columns_rename: dict[str, str] | None = None, ): """Creates a ``PortfolioData`` object from a pandas ``DataFrame``. The portfolio data is expected to contain at least the following columns names: - ``"assets"``: The name of the asset. - ``"outstanding_now_now"``: Current outstanding amount per asset. - ``"min_outstanding_future"``: Lower bound outstanding amount in the future per asset. - ``"max_outstanding_future"``: Upper bound outstanding amount in the future per asset. - ``"income_now"``: Current income per asset, corresponds to return multiplied by the current outstanding amount. - ``"regcap_now"``: Current regulatory capital per asset. Different column names in the dataset can be used but need to be provided as a renaming dictionary to the ``columns_rename`` argument. Args: portfolio_dataframe: Pandas ``DataFrame`` containing the portfolio data. column_rename: to rename columns provided as dict with new column names as keys and to replace column name as value. Example ``{"outstanding_2021": "outstanding_now"}``. Raises: ValueError if required columns are not present in dataset. """ if columns_rename is not None: portfolio_dataframe.rename(columns=columns_rename, inplace=True) # Validate dataset to contain required column names for required_column_name in DEFAULT_REQUIRED_COLUMN_NAMES: if required_column_name not in portfolio_dataframe.columns: raise ValueError( f"Required column name {required_column_name} is not in dataset." ) self.portfolio_df = portfolio_dataframe
[docs] @classmethod def from_file( cls: type[PortfolioDataT], filename: str | Path, columns_rename: dict[str, str] | None = None, ) -> PortfolioDataT: """Reads portfolio data object into ``PortfolioData``. The portfolio data is expected to contain at least the following columns names: - ``"assets"``: The name of the asset. - ``"outstanding_now_now"``: Current outstanding amount per asset. - ``"min_outstanding_future"``: Lower bound outstanding amount in the future per asset. - ``"max_outstanding_future"``: Upper bound outstanding amount in the future per asset. - ``"income_now"``: Current income per asset, corresponds to return multiplied by the current outstanding amount. - ``"regcap_now"``: Current regulatory capital per asset. Different column names in the dataset can be used but need to be provided as a renaming dictionary to the ``columns_rename`` argument. Args: filename: path to portfolio data. If instead ``benchmark_dataset`` is provided, a default benchmark dataset containing 52 assets will be used. column_rename: to rename columns provided as dict with new column names as keys and to replace column name as value. Example ``{"outstanding_2021": "outstanding_now"}``. Raises: ValueError if required columns are not present in dataset. """ if str(filename) == "benchmark_dataset": filename = Path(__file__).parents[1] / "datasets" / "benchmark_dataset.xlsx" filename = str(filename) if filename.endswith(".xlsx"): portfolio_data = pd.read_excel(filename) elif filename.endswith(".csv"): portfolio_data = pd.read_csv(filename) elif filename.endswith(".json"): portfolio_data = pd.read_json(filename) else: raise ValueError("Datatype not supported.") return cls(portfolio_data, columns_rename)
[docs] def get_outstanding_now(self) -> NDArray[np.float_]: """Gets the `outstanding_now` data from the dataset. Returns: The `outstanding_now` column from the dataset as a numpy array. """ return cast(NDArray[np.float_], self.portfolio_df["outstanding_now"].to_numpy())
[docs] def get_l_bound(self) -> NDArray[np.float_]: """Gets the `l_bound` data from the dataset. Returns: The `min_outstanding_future` column from the dataset as a numpy array. """ return cast( NDArray[np.float_], self.portfolio_df["min_outstanding_future"].to_numpy() )
[docs] def get_u_bound(self) -> NDArray[np.float_]: """Gets the `u_bound` data from the dataset. Returns: The `max_outstanding_future` column from the dataset as a numpy array. """ return cast( NDArray[np.float_], self.portfolio_df["max_outstanding_future"].to_numpy() )
[docs] def get_income(self) -> NDArray[np.float_]: """Gets the `income` data from the dataset. Returns: The `income_now` column from the dataset as a numpy array. """ return cast(NDArray[np.float_], self.portfolio_df["income_now"].to_numpy())
[docs] def get_capital(self) -> NDArray[np.float_]: """Gets the `capital` data from the dataset. Returns: The `regcap_now` column from the dataset as a numpy array. """ return cast(NDArray[np.float_], self.portfolio_df["regcap_now"].to_numpy())
[docs] def get_returns(self) -> NDArray[np.float_]: """Gets the `returns` data from the dataset. Returns: Returns is defined as income / outstanding_now """ income = self.get_income() outstanding_now = self.get_outstanding_now() return cast(NDArray[np.float_], income / outstanding_now)
[docs] def get_column(self, column_name: str) -> NDArray[np.float_]: """Gets the specified column from the dataset. Args: column_name: Name of the column to get. Returns: The `regcap_now` columns from the dataset as a numpy array. """ return cast(NDArray[np.float_], self.portfolio_df[column_name].to_numpy())
[docs] def print_portfolio_info(self) -> None: """Prints information about portfolio data to terminal.""" outstanding_now = self.get_outstanding_now() l_bound = self.get_l_bound() u_bound = self.get_u_bound() income = self.get_income() capital = self.get_capital() # Calculate the total outstanding amount in now total_outstanding_now = np.sum(outstanding_now) print(f"Total outstanding now: {total_outstanding_now:.2f}") # Calculate the ROC for now roc_now = np.sum(income) / np.sum(capital) print(f"ROC now: {roc_now:.4f}") # Calculate the HHI diversification for now hhi_now = ( np.sum(total_outstanding_now**2) / np.sum(total_outstanding_now) ** 2 ) print(f"HHI now: {hhi_now:.4f}") if "emis_intens_now" in self.portfolio_df: # Calculate the total emissions for now total_emission_now = np.sum( self.get_column("emis_intens_now") * total_outstanding_now ) print(f"Total Emission now: {total_emission_now:.2f}") # Calculate the average emission intensity now relative_total_emission = total_emission_now / total_outstanding_now print(f"Relative emission intensity now: {relative_total_emission:.2f}") # Estimate the total outstanding amount and its standard deviation for future. This # follows from the assumption of a symmetric probability distribution on the # interval [l_bound, u_bound] and the central limit theorem. expected_total_outstanding_future = np.sum(u_bound + l_bound) / 2 expected_stddev_total_outstanding_future = np.linalg.norm( (u_bound - l_bound) / 2 ) print( f"Expected total outstanding future: {expected_total_outstanding_future:.2f}", f"Std dev: {expected_stddev_total_outstanding_future:.2f}", ) # Estimate a average growth factor and its standard deviation for now-future. This # consists of the (averaged) amount per asset in future, which is the outcome of the # optimization, divided by the amount for now. expected_average_growth_fac = np.sum( (u_bound + l_bound) / (2 * total_outstanding_now) ) expected_stddev_average_growth_fac = np.linalg.norm( (u_bound - l_bound) / (2 * total_outstanding_now) ) print( f"Expected average growth factor: {expected_average_growth_fac:.4f}", f"Std dev: {expected_stddev_average_growth_fac:.4f}", )
[docs] def __len__(self) -> int: """Length of the dataset.""" return len(self.portfolio_df)
[docs] def __repr__(self) -> str: """Representation for debugging.""" txt = f"{self.__class__.__name__} object containing the following data:\n" txt += repr(self.portfolio_df) return txt
[docs] def __str__(self) -> str: """String representation of the ``PortfolioData`` object.""" txt = f"{self.__class__.__name__} object containing the following data:\n" txt += str(self.portfolio_df) return txt
[docs] def __contains__(self, other: object) -> bool: """Check if ``other`` is part of the dataset.""" return other in self.portfolio_df