Source code for osos.api_pypi.api_pypi

# -*- coding: utf-8 -*-
"""
Interface module for pypi API
"""
import pypistats
import datetime
import numpy as np
import pandas as pd
import logging


logger = logging.getLogger(__name__)


[docs]class Pypi: """Class to call pypi data and return osos-formatted pypi usage data."""
[docs] @staticmethod def get_data(name, include_mirrors=False): """Get the dataframe for the last 180 days of download data Parameters ---------- name : str pypi package name. Note that this should include the prefix for nrel packages e.g. reV -> nrel-rev include_mirrors : bool Flag to include mirror downloads or not Returns ------- out : pd.DataFrame DataFrame of pypistats data for the last 180 days with: "pypi_daily" and "pypi_180_cumulative". Note that the 180 day cumulative is for the last 180 days from today's actual date, not 180 days from the date in the output row index. """ try: out = pypistats.overall(name, total=True, format="pandas") except Exception as e: msg = ('Could not get pypi stats for package "{}", ' 'received the following exception: {}'.format(name, e)) logger.exception(msg) raise RuntimeError(msg) from e else: out = out.iloc[:-1] # drop totals row, unnecessary if not include_mirrors: out = out[(out['category'] == 'without_mirrors')] else: out = out[(out['category'] == 'with_mirrors')] out.index = pd.to_datetime(out['date']).dt.date out = out.sort_index() out = out.drop(['category', 'percent', 'date'], axis=1) out = out.rename({'downloads': 'pypi_daily'}, axis=1) cumulative = np.cumsum(out['pypi_daily']) out['pypi_180_cumulative'] = cumulative return out
[docs] @classmethod def get_daily_data(cls, name, dates, include_mirrors=False): """Get one month of usage data for a pypi package Parameters ---------- name : str pypi package name. Note that this should include the prefix for nrel packages e.g. reV -> nrel-rev dates : datetime.date | list One or more dates to retrieve data for include_mirrors : bool Flag to include mirror downloads or not Returns ------- out : pd.DataFrame DataFrame with sorted index of the "dates" input with: "pypi_daily" and "pypi_180_cumulative". Note that the 180 day cumulative is for the last 180 days from today's actual date, not 180 days from the date in the output row index. """ logger.debug(f'Getting daily pypi data for "{name}"') logger.debug(f'Including mirrors: {include_mirrors}') out = cls.get_data(name, include_mirrors=include_mirrors) if isinstance(dates, datetime.date): dates = [dates] out = pd.DataFrame(index=sorted(dates)).join(out) out['pypi_daily'] = out['pypi_daily'].fillna(0) out['pypi_180_cumulative'] = out['pypi_180_cumulative'].ffill().bfill() out = out.fillna(0) out = out.astype(np.int64) return out