Source code for ridgeplot.datasets

from __future__ import annotations

import sys
from typing import TYPE_CHECKING

if sys.version_info >= (3, 10):
    from importlib.resources import files
else:
    from importlib_resources import files

if TYPE_CHECKING:
    from typing import Literal

import pandas as pd

__all__ = [
    "load_probly",
    "load_lincoln_weather",
]


_DATA_DIR = files("ridgeplot.datasets.data")


[docs] def load_probly( version: Literal["zonination", "wadefagen", "illinois"] = "zonination", ) -> pd.DataFrame: """Load a version of the "Perception of Probability Words" (a.k.a., *"probly"*) dataset. Parameters ---------- version : {'zonination', 'wadefagen', 'illinois'}, default: 'zonination' The version of the dataset to load. Each version is slightly different and originates from different surveys. See the `Notes`_ section for more details on each version. Returns ------- :class:`pandas.DataFrame` A dataframe containing a *probly* dataset. Notes ----- .. _Notes: Sherman Kent, a CIA analyst, first published his work on the perception of probabilistic words in 1964 [1]_. This exercise has been repeated several times since then. This function provides three different versions of the dataset, each originating from a different survey. Valid options for the ``version`` parameter are: ``"zonination"`` This is perhaps the most popular version of the dataset and originates from a survey conducted by the Reddit user `/u/zonination`_. .. collapse:: <i>Dataset details...</i> .. list-table:: :stub-columns: 1 :align: left * - Creator - :gh-user:`zonination` * - Source - https://raw.githubusercontent.com/zonination/perceptions/51207062aa173777264d3acce0131e1e2456d966/probly.csv * - Accessed on - 2023-06-24 ``"wadefagen"`` This version of the dataset originates from a blogpost by Wade Fagen-Ulmschneider from the University of Illinois [2]_. It is based on a survey conducted on different social media platforms. .. collapse:: <i>Dataset details...</i> .. list-table:: :stub-columns: 1 :align: left * - Creator - Wade Fagen-Ulmschneider (:gh-user:`wadefagen`) * - Source - https://raw.githubusercontent.com/wadefagen/datasets/7e752937b72edc3126e3dd17e3cd97eb727af8f9/Perception-of-Probability-Words/survey-results.csv * - Accessed on - 2023-06-24 ``"illinois"`` This version of the dataset originates from a survey of primarily undergraduate students conducted at The University of Illinois [3]_. .. collapse:: <i>Dataset details...</i> .. list-table:: :stub-columns: 1 :align: left * - Creator - University of Illinois * - Source - https://waf.cs.illinois.edu/discovery/words.csv * - Accessed on - 2023-06-24 References ---------- .. [1] Sherman Kent. (1964). *"Words of estimative probability"*. https://www.cia.gov/static/Words-of-Estimative-Probability.pdf .. [2] Wade Fagen-Ulmschneider. *"Perception of Probability Words"*. https://waf.cs.illinois.edu/visualizations/Perception-of-Probability-Words/ .. [3] University of Illinois. *"Perception of Probability Words Dataset"*. https://discovery.cs.illinois.edu/dataset/words/ .. _/u/zonination: https://www.reddit.com/user/zonination """ versions = { "zonination": "probly-zonination.csv", "wadefagen": "probly-wadefagen.csv", "illinois": "probly-illinois.csv", } if version not in versions: raise ValueError( f"Unknown version {version!r} for the probly dataset. " f"Valid versions are {list(versions.keys())}." ) return pd.read_csv(_DATA_DIR / versions[version])
[docs] def load_lincoln_weather() -> pd.DataFrame: """Load the "Weather in Lincoln, Nebraska in 2016" dataset. Returns ------- :class:`pandas.DataFrame` A dataframe containing the "Lincoln Weather" dataset. Notes ----- The version of the dataset included in this package is the same version included in the `ggridges` R package [1]_. The dataset contains weather information from Lincoln, Nebraska (2016). The original data was taken from a blogpost by Austin Wehrwein in 2017 [2]_. .. collapse:: <i>Details...</i> .. list-table:: :stub-columns: 1 :align: left * - Source - https://raw.githubusercontent.com/wilkelab/ggridges/543a092c601b92d7b62e630fb34d038f54485a29/data-raw/lincoln-weather.csv * - Accessed on - 2023-08-07 References ---------- .. [1] ggridges. *"Weather in Lincoln, Nebraska in 2016"*. https://wilkelab.org/ggridges/reference/lincoln_weather.html .. [2] Austin Wehrwein. *"Plot inspiration via FiveThirtyEight"*. https://austinwehrwein.com/data-visualization/plot-inspiration-via-fivethirtyeight/ """ data = pd.read_csv(_DATA_DIR / "lincoln-weather.csv", index_col="CST") data.index = pd.to_datetime(data.index.to_list()) return data