Source code for pyobo.api.relations

"""High-level API for relations."""

import logging
from collections.abc import Mapping
from functools import lru_cache

import pandas as pd
from typing_extensions import Unpack

from .utils import get_version_from_kwargs
from ..constants import (
    RELATION_COLUMNS,
    RELATION_ID,
    RELATION_PREFIX,
    SOURCE_ID,
    SOURCE_PREFIX,
    TARGET_ID,
    TARGET_PREFIX,
    GetOntologyKwargs,
    check_should_cache,
    check_should_force,
    check_should_use_tqdm,
)
from ..getters import get_ontology
from ..identifier_utils import wrap_norm_prefix
from ..struct.reference import Reference
from ..struct.struct_utils import ReferenceHint, _ensure_ref
from ..utils.cache import cached_df
from ..utils.path import CacheArtifact, get_cache_path, get_relation_cache_path

__all__ = [
    "get_filtered_relations_df",
    "get_id_multirelations_mapping",
    "get_relation",
    "get_relation_mapping",
    "get_relations",
    "get_relations_df",
]

logger = logging.getLogger(__name__)


@wrap_norm_prefix
def get_relations(
    prefix: str, **kwargs: Unpack[GetOntologyKwargs]
) -> list[tuple[Reference, Reference, Reference]]:
    """Get relations."""
    df = get_relations_df(prefix, wide=False, **kwargs)
    return [
        (
            Reference(prefix=prefix, identifier=source_id),
            Reference(prefix=relation_prefix, identifier=relation_id),
            Reference(prefix=target_prefix, identifier=target_id),
        )
        for source_id, relation_prefix, relation_id, target_prefix, target_id in df.values
    ]


[docs] @wrap_norm_prefix def get_relations_df( prefix: str, *, wide: bool = False, **kwargs: Unpack[GetOntologyKwargs] ) -> pd.DataFrame: """Get all relations from the OBO.""" version = get_version_from_kwargs(prefix, kwargs) path = get_cache_path(prefix, CacheArtifact.relations, version=version) @cached_df( path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs) ) def _df_getter() -> pd.DataFrame: ontology = get_ontology(prefix, **kwargs) return ontology.get_relations_df(use_tqdm=check_should_use_tqdm(kwargs)) rv = _df_getter() if wide: rv = rv.rename(columns={f"{prefix}_id": SOURCE_ID}) rv[SOURCE_PREFIX] = prefix rv = rv[RELATION_COLUMNS] return rv
[docs] @wrap_norm_prefix def get_filtered_relations_df( prefix: str, relation: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs], ) -> pd.DataFrame: """Get all the given relation.""" relation = _ensure_ref(relation, ontology_prefix=prefix) version = get_version_from_kwargs(prefix, kwargs) all_relations_path = get_cache_path(prefix, CacheArtifact.relations, version=version) if all_relations_path.is_file(): logger.debug("[%] loading all relations from %s", prefix, all_relations_path) df = pd.read_csv(all_relations_path, sep="\t", dtype=str) idx = (df[RELATION_PREFIX] == relation.prefix) & (df[RELATION_ID] == relation.identifier) columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID] return df.loc[idx, columns] path = get_relation_cache_path(prefix, relation, version=version) @cached_df( path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs) ) def _df_getter() -> pd.DataFrame: logger.info("[%s] no cached relations found. getting from OBO loader", prefix) ontology = get_ontology(prefix, **kwargs) return ontology.get_filtered_relations_df(relation, use_tqdm=check_should_use_tqdm(kwargs)) return _df_getter()
[docs] @wrap_norm_prefix def get_id_multirelations_mapping( prefix: str, typedef: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs], ) -> Mapping[str, list[Reference]]: """Get the OBO file and output a synonym dictionary.""" kwargs["version"] = get_version_from_kwargs(prefix, kwargs) ontology = get_ontology(prefix, **kwargs) return ontology.get_id_multirelations_mapping( typedef=typedef, use_tqdm=check_should_use_tqdm(kwargs) )
[docs] @lru_cache @wrap_norm_prefix def get_relation_mapping( prefix: str, relation: ReferenceHint, target_prefix: str, **kwargs: Unpack[GetOntologyKwargs], ) -> Mapping[str, str]: """Get relations from identifiers in the source prefix to target prefix with the given relation. .. warning:: Assumes there's only one version of the property for each term. Example usage: get homology between HGNC and MGI: >>> import pyobo >>> human_mapt_hgnc_id = "6893" >>> mouse_mapt_mgi_id = "97180" >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi") >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] """ ontology = get_ontology(prefix, **kwargs) return ontology.get_relation_mapping( relation=relation, target_prefix=target_prefix, use_tqdm=check_should_use_tqdm(kwargs) )
[docs] @wrap_norm_prefix def get_relation( prefix: str, source_identifier: str, relation: ReferenceHint, target_prefix: str, **kwargs: Unpack[GetOntologyKwargs], ) -> str | None: """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair. .. warning:: Assumes there's only one version of the property for each term. Example usage: get homology between MAPT in HGNC and MGI: >>> import pyobo >>> human_mapt_hgnc_id = "6893" >>> mouse_mapt_mgi_id = "97180" >>> assert mouse_mapt_mgi_id == pyobo.get_relation( ... "hgnc", human_mapt_hgnc_id, "ro:HOM0000017", "mgi" ... ) """ relation_mapping = get_relation_mapping( prefix=prefix, relation=relation, target_prefix=target_prefix, **kwargs, ) return relation_mapping.get(source_identifier)