Source code for pyobo.api.properties

# -*- coding: utf-8 -*-

"""High-level API for properties."""

import logging
import os
from typing import List, Mapping, Optional

import pandas as pd

from .utils import get_version
from ..getters import get_ontology
from ..identifier_utils import wrap_norm_prefix
from ..utils.cache import cached_df, cached_mapping, cached_multidict
from ..utils.io import multidict
from ..utils.path import prefix_cache_join

__all__ = [
    "get_properties_df",
    "get_filtered_properties_df",
    "get_filtered_properties_mapping",
    "get_filtered_properties_multimapping",
    "get_property",
    "get_properties",
]

logger = logging.getLogger(__name__)


[docs]@wrap_norm_prefix def get_properties_df(prefix: str, *, force: bool = False) -> pd.DataFrame: """Extract properties. :param prefix: the resource to load :param force: should the resource be re-downloaded, re-parsed, and re-cached? :returns: A dataframe with the properties """ version = get_version(prefix) path = prefix_cache_join(prefix, name="properties.tsv", version=version) @cached_df(path=path, dtype=str, force=force) def _df_getter() -> pd.DataFrame: if force: logger.info("[%s] forcing reload for properties", prefix) else: logger.info("[%s] no cached properties found. getting from OBO loader", prefix) ontology = get_ontology(prefix, force=force, version=version) df = ontology.get_properties_df() df.dropna(inplace=True) return df return _df_getter()
[docs]@wrap_norm_prefix def get_filtered_properties_mapping( prefix: str, prop: str, *, use_tqdm: bool = False, force: bool = False, ) -> Mapping[str, str]: """Extract a single property for each term as a dictionary. :param prefix: the resource to load :param prop: the property to extract :param use_tqdm: should a progress bar be shown? :param force: should the resource be re-downloaded, re-parsed, and re-cached? :returns: A mapping from identifier to property value """ version = get_version(prefix) path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version) all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version) @cached_mapping(path=path, header=[f"{prefix}_id", prop], force=force) def _mapping_getter() -> Mapping[str, str]: if os.path.exists(all_properties_path): logger.info("[%s] loading pre-cached properties", prefix) df = pd.read_csv(all_properties_path, sep="\t") logger.info("[%s] filtering pre-cached properties", prefix) df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]] return dict(df.values) logger.info("[%s] no cached properties found. getting from OBO loader", prefix) ontology = get_ontology(prefix, force=force, version=version) return ontology.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm) return _mapping_getter()
[docs]@wrap_norm_prefix def get_filtered_properties_multimapping( prefix: str, prop: str, *, use_tqdm: bool = False, force: bool = False, ) -> Mapping[str, List[str]]: """Extract multiple properties for each term as a dictionary. :param prefix: the resource to load :param prop: the property to extract :param use_tqdm: should a progress bar be shown? :param force: should the resource be re-downloaded, re-parsed, and re-cached? :returns: A mapping from identifier to property values """ version = get_version(prefix) path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version) all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version) @cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force) def _mapping_getter() -> Mapping[str, List[str]]: if os.path.exists(all_properties_path): logger.info("[%s] loading pre-cached properties", prefix) df = pd.read_csv(all_properties_path, sep="\t") logger.info("[%s] filtering pre-cached properties", prefix) df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]] return multidict(df.values) logger.info("[%s] no cached properties found. getting from OBO loader", prefix) ontology = get_ontology(prefix, force=force, version=version) return ontology.get_filtered_properties_multimapping(prop, use_tqdm=use_tqdm) return _mapping_getter()
[docs]def get_property(prefix: str, identifier: str, prop: str) -> Optional[str]: """Extract a single property for the given entity. :param prefix: the resource to load :param identifier: the identifier withing the resource :param prop: the property to extract :returns: The single value for the property. If multiple are expected, use :func:`get_properties` >>> import pyobo >>> pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles') "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F" """ filtered_properties_mapping = get_filtered_properties_mapping(prefix=prefix, prop=prop) return filtered_properties_mapping.get(identifier)
[docs]def get_properties(prefix: str, identifier: str, prop: str) -> Optional[List[str]]: """Extract a set of properties for the given entity. :param prefix: the resource to load :param identifier: the identifier withing the resource :param prop: the property to extract :returns: Multiple values for the property. If only one is expected, use :func:`get_property` """ filtered_properties_multimapping = get_filtered_properties_multimapping( prefix=prefix, prop=prop ) return filtered_properties_multimapping.get(identifier)
[docs]@wrap_norm_prefix def get_filtered_properties_df( prefix: str, prop: str, *, use_tqdm: bool = False, force: bool = False, ) -> pd.DataFrame: """Extract a single property for each term. :param prefix: the resource to load :param prop: the property to extract :param use_tqdm: should a progress bar be shown? :param force: should the resource be re-downloaded, re-parsed, and re-cached? :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value]. """ version = get_version(prefix) path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version) all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version) @cached_df(path=path, dtype=str, force=force) def _df_getter() -> pd.DataFrame: if os.path.exists(all_properties_path): logger.info("[%s] loading pre-cached properties", prefix) df = pd.read_csv(all_properties_path, sep="\t") logger.info("[%s] filtering pre-cached properties", prefix) return df.loc[df["property"] == prop, [f"{prefix}_id", "value"]] if force: logger.info("[%s] forcing reload for properties", prefix) else: logger.info("[%s] no cached properties found. getting from OBO loader", prefix) ontology = get_ontology(prefix, force=force, version=version) return ontology.get_filtered_properties_df(prop, use_tqdm=use_tqdm) return _df_getter()