Source code for pyobo.utils.path

"""Utilities for building paths."""

from __future__ import annotations

import enum
import json
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any

import pandas as pd
from curies import Reference
from pystow import VersionHint
from pystow.utils.download import DownloadKwargs
from typing_extensions import Unpack

from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME

if TYPE_CHECKING:
    from pandas._typing import DtypeArg

__all__ = [
    "CacheArtifact",
    "ensure_df",
    "ensure_json",
    "ensure_path",
    "get_cache_path",
    "get_relation_cache_path",
    "join_path",
    "prefix_directory_join",
]

logger = logging.getLogger(__name__)


def prefix_directory_join(
    prefix: str,
    *parts: str,
    name: str | None = None,
    version: VersionHint = None,
    ensure_exists: bool = True,
) -> Path:
    """Join in the prefix directory."""
    return RAW_MODULE.module(prefix).join(
        *parts,
        name=name,
        ensure_exists=ensure_exists,
        version=version,
    )


def join_path(
    prefix: str,
    *parts: str,
    version: VersionHint = None,
    name: str | None = None,
) -> Path:
    """Download a file if it doesn't exist."""
    return RAW_MODULE.module(prefix).join(*parts, name=name, version=version)


[docs] def ensure_path( prefix: str, *parts: str, url: str, version: VersionHint = None, name: str | None = None, force: bool = False, **download_kwargs: Unpack[DownloadKwargs], ) -> Path: """Download a file if it doesn't exist.""" return RAW_MODULE.module(prefix).ensure( *parts, url=url, name=name, force=force, version=version, download_kwargs=download_kwargs, )
def ensure_df( prefix: str, *parts: str, url: str, version: VersionHint = None, name: str | None = None, force: bool = False, sep: str = "\t", dtype: DtypeArg | None = str, download_kwargs: DownloadKwargs | None = None, **kwargs: Any, ) -> pd.DataFrame: """Download a file and open as a dataframe.""" _path = ensure_path( prefix, *parts, url=url, version=version, name=name, force=force, **(download_kwargs or {}), ) return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs) def ensure_json( prefix: str, *parts: str, url: str, version: VersionHint = None, name: str | None = None, force: bool = False, **kwargs: Unpack[DownloadKwargs], ) -> Any: """Download a file and open as JSON.""" _path = ensure_path( prefix, *parts, url=url, version=version, name=name, **kwargs, ) with _path.open() as file: return json.load(file) class CacheArtifact(enum.Enum): """An enumeration for.""" names = "names.tsv.gz" definitions = "definitions.tsv.gz" species = "species.tsv.gz" mappings = "mappings.sssom.tsv.gz" relations = "relations.tsv.gz" alts = "alt_ids.tsv.gz" typedefs = "typedefs.tsv.gz" literal_mappings = "literal_mappings.tsv.gz" references = "references.tsv.gz" obsoletes = "obsolete.tsv.gz" literal_properties = "literal_properties.tsv.gz" object_properties = "object_properties.tsv.gz" nodes = "nodes.tsv.gz" edges = "edges.tsv.gz" prefixes = "prefixes.json" metadata = "metadata.json" embeddings = "embeddings.tsv.gz" def get_cache_path( ontology: str, name: CacheArtifact, *, version: str | None = None, ) -> Path: """Get a cache path.""" return prefix_directory_join( ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version ) def get_relation_cache_path( ontology: str, reference: Reference, *, version: str | None = None, ) -> Path: """Get a relation cache path.""" return prefix_directory_join( ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version )