"""Utilities for building paths."""
from __future__ import annotations
import enum
import json
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any
import pandas as pd
from curies import Reference
from pystow import VersionHint
from pystow.utils.download import DownloadKwargs
from typing_extensions import Unpack
from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
if TYPE_CHECKING:
from pandas._typing import DtypeArg
__all__ = [
"CacheArtifact",
"ensure_df",
"ensure_json",
"ensure_path",
"get_cache_path",
"get_relation_cache_path",
"join_path",
"prefix_directory_join",
]
logger = logging.getLogger(__name__)
def prefix_directory_join(
prefix: str,
*parts: str,
name: str | None = None,
version: VersionHint = None,
ensure_exists: bool = True,
) -> Path:
"""Join in the prefix directory."""
return RAW_MODULE.module(prefix).join(
*parts,
name=name,
ensure_exists=ensure_exists,
version=version,
)
def join_path(
prefix: str,
*parts: str,
version: VersionHint = None,
name: str | None = None,
) -> Path:
"""Download a file if it doesn't exist."""
return RAW_MODULE.module(prefix).join(*parts, name=name, version=version)
[docs]
def ensure_path(
prefix: str,
*parts: str,
url: str,
version: VersionHint = None,
name: str | None = None,
force: bool = False,
**download_kwargs: Unpack[DownloadKwargs],
) -> Path:
"""Download a file if it doesn't exist."""
return RAW_MODULE.module(prefix).ensure(
*parts,
url=url,
name=name,
force=force,
version=version,
download_kwargs=download_kwargs,
)
def ensure_df(
prefix: str,
*parts: str,
url: str,
version: VersionHint = None,
name: str | None = None,
force: bool = False,
sep: str = "\t",
dtype: DtypeArg | None = str,
download_kwargs: DownloadKwargs | None = None,
**kwargs: Any,
) -> pd.DataFrame:
"""Download a file and open as a dataframe."""
_path = ensure_path(
prefix,
*parts,
url=url,
version=version,
name=name,
force=force,
**(download_kwargs or {}),
)
return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
def ensure_json(
prefix: str,
*parts: str,
url: str,
version: VersionHint = None,
name: str | None = None,
force: bool = False,
**kwargs: Unpack[DownloadKwargs],
) -> Any:
"""Download a file and open as JSON."""
_path = ensure_path(
prefix,
*parts,
url=url,
version=version,
name=name,
**kwargs,
)
with _path.open() as file:
return json.load(file)
class CacheArtifact(enum.Enum):
"""An enumeration for."""
names = "names.tsv.gz"
definitions = "definitions.tsv.gz"
species = "species.tsv.gz"
mappings = "mappings.sssom.tsv.gz"
relations = "relations.tsv.gz"
alts = "alt_ids.tsv.gz"
typedefs = "typedefs.tsv.gz"
literal_mappings = "literal_mappings.tsv.gz"
references = "references.tsv.gz"
obsoletes = "obsolete.tsv.gz"
literal_properties = "literal_properties.tsv.gz"
object_properties = "object_properties.tsv.gz"
nodes = "nodes.tsv.gz"
edges = "edges.tsv.gz"
prefixes = "prefixes.json"
metadata = "metadata.json"
embeddings = "embeddings.tsv.gz"
def get_cache_path(
ontology: str,
name: CacheArtifact,
*,
version: str | None = None,
) -> Path:
"""Get a cache path."""
return prefix_directory_join(
ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
)
def get_relation_cache_path(
ontology: str,
reference: Reference,
*,
version: str | None = None,
) -> Path:
"""Get a relation cache path."""
return prefix_directory_join(
ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
)