Source code for pyobo.api.properties

"""High-level API for properties."""

import logging
from collections.abc import Mapping

import pandas as pd
from tqdm import tqdm
from typing_extensions import Unpack

from .utils import get_version_from_kwargs
from ..constants import (
    GetOntologyKwargs,
    check_should_cache,
    check_should_force,
    check_should_use_tqdm,
)
from ..getters import get_ontology
from ..identifier_utils import wrap_norm_prefix
from ..struct import Reference
from ..struct.struct_utils import OBOLiteral, ReferenceHint, _ensure_ref
from ..utils.cache import cached_df
from ..utils.io import multidict
from ..utils.path import CacheArtifact, get_cache_path

__all__ = [
    "get_filtered_properties_df",
    "get_filtered_properties_mapping",
    "get_filtered_properties_multimapping",
    "get_literal_properties",
    "get_literal_properties_df",
    "get_object_properties",
    "get_object_properties_df",
    "get_properties",
    "get_properties_df",
    "get_property",
]

logger = logging.getLogger(__name__)


[docs] def get_object_properties_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame: """Get a dataframe of object property triples.""" version = get_version_from_kwargs(prefix, kwargs) path = get_cache_path(prefix, CacheArtifact.object_properties, version=version) @cached_df( path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs) ) def _df_getter() -> pd.DataFrame: return get_ontology(prefix, **kwargs).get_object_properties_df( use_tqdm=check_should_use_tqdm(kwargs) ) return _df_getter()
[docs] def get_object_properties( prefix, **kwargs: Unpack[GetOntologyKwargs] ) -> list[tuple[Reference, Reference, Reference]]: """Get a list of object property triples.""" df = get_object_properties_df(prefix, **kwargs) return [ (Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o)) for s, p, o in df.values ]
[docs] def get_literal_properties( prefix: str, **kwargs: Unpack[GetOntologyKwargs] ) -> list[tuple[Reference, Reference, OBOLiteral]]: """Get a list of literal property triples.""" df = get_literal_properties_df(prefix, **kwargs) return [ ( Reference.from_curie(s), Reference.from_curie(p), OBOLiteral( value, Reference.from_curie(datatype), language if language and pd.notna(language) else None, ), ) for s, p, value, datatype, language in tqdm( df.values, desc=f"[{prefix}] parsing properties", unit_scale=True, unit="triple", disable=not check_should_use_tqdm(kwargs), ) ]
[docs] def get_literal_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame: """Get a dataframe of literal property quads.""" version = get_version_from_kwargs(prefix, kwargs) path = get_cache_path(prefix, CacheArtifact.literal_properties, version=version) @cached_df( path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs) ) def _df_getter() -> pd.DataFrame: return get_ontology(prefix, **kwargs).get_literal_properties_df( use_tqdm=check_should_use_tqdm(kwargs) ) return _df_getter()
[docs] @wrap_norm_prefix def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame: """Extract properties. :param prefix: the resource to load :returns: A dataframe with the properties """ df1 = get_literal_properties_df(prefix, **kwargs) df2 = get_object_properties_df(prefix, **kwargs) df = pd.concat([df1[["source", "predicate", "target"]], df2]) ll = len(prefix) + 1 df[f"{prefix}_id"] = df["source"].map(lambda x: x[ll:]) df = df.rename(columns={"predicate": "property", "target": "value"}) del df["source"] return df[[f"{prefix}_id", "property", "value"]]
[docs] @wrap_norm_prefix def get_filtered_properties_mapping( prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs] ) -> Mapping[str, str]: """Extract a single property for each term as a dictionary. :param prefix: the resource to load :param prop: the property to extract :returns: A mapping from identifier to property value """ df = get_filtered_properties_df(prefix, prop, **kwargs) return dict(df.values)
[docs] @wrap_norm_prefix def get_filtered_properties_multimapping( prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs] ) -> Mapping[str, list[str]]: """Extract multiple properties for each term as a dictionary. :param prefix: the resource to load :param prop: the property to extract :returns: A mapping from identifier to property values """ df = get_filtered_properties_df(prefix, prop, **kwargs) return multidict(df.values)
[docs] def get_property( prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs] ) -> str | None: """Extract a single property for the given entity. :param prefix: the resource to load :param identifier: the identifier withing the resource :param prop: the property to extract :returns: The single value for the property. If multiple are expected, use :func:`get_properties` >>> import pyobo >>> pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles") "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F" """ filtered_properties_mapping = get_filtered_properties_mapping( prefix=prefix, prop=prop, **kwargs ) return filtered_properties_mapping.get(identifier)
[docs] def get_properties( prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs], ) -> list[str] | None: """Extract a set of properties for the given entity. :param prefix: the resource to load :param identifier: the identifier withing the resource :param prop: the property to extract :returns: Multiple values for the property. If only one is expected, use :func:`get_property` """ filtered_properties_multimapping = get_filtered_properties_multimapping( prefix=prefix, prop=prop, **kwargs ) return filtered_properties_multimapping.get(identifier)
[docs] @wrap_norm_prefix def get_filtered_properties_df( prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs] ) -> pd.DataFrame: """Extract a single property for each term. :param prefix: the resource to load :param prop: the property to extract :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value]. """ prop = _ensure_ref(prop, ontology_prefix=prefix) df = get_properties_df(prefix, **kwargs) df = df.loc[df["property"] == prop.curie, [f"{prefix}_id", "value"]] return df