{ "cells": [ { "cell_type": "markdown", "id": "21a18f81-2f49-449f-b418-57379fd64097", "metadata": { "tags": [] }, "source": [ "# Create a catalog for /pool/data" ] }, { "cell_type": "code", "execution_count": null, "id": "309bbc26-9fc2-41bf-8cac-f9a2be14870a", "metadata": { "tags": [] }, "outputs": [], "source": [ "import intake\n", "import os\n", "import yaml\n", "import fsspec\n", "from copy import deepcopy\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": null, "id": "e3513ad0-f982-4a73-a306-1d7985a26746", "metadata": { "tags": [] }, "outputs": [], "source": [ "def create_or_update(intfile, ss):\n", " os.makedirs(os.path.dirname(intfile),exist_ok=True)\n", " if os.path.isfile(intfile):\n", " with fsspec.open(intfile,\"r\") as f:\n", " cat=yaml.full_load(f,)\n", " cat[\"sources\"].update(ss[\"sources\"])\n", " with fsspec.open(intfile,\"w\") as f:\n", " yaml.dump(cat,f)\n", " else:\n", " with fsspec.open(intfile,\"w\") as f:\n", " yaml.dump(ss,f)" ] }, { "cell_type": "code", "execution_count": null, "id": "e2e1b32c-ad7b-4a37-804e-1a5a40621b5f", "metadata": { "tags": [] }, "outputs": [], "source": [ "template={\n", " \"args\":{\n", " \"path\": \"{{CATALOG_DIR}}/dkrz/main.yaml\"\n", " },\n", " \"description\": \"This catalog contains datasets for EERIE stored on DKRZ\",\n", " \"driver\": \"yaml_file_cat\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "a6c65aab-7870-4177-ad74-aaf055907bac", "metadata": { "tags": [] }, "outputs": [], "source": [ "TRUNK=\"/pool/data/\"\n", "osp=os.path\n", "poolcattrunk=\"../pool\"\n", "poolcat=osp.join(poolcattrunk,\"main.yaml\")\n", "poolcat_candidate=dict(sources=dict())\n", "for candidate_dir in tqdm([\n", " a \n", " for a in os.listdir(TRUNK)\n", "# if os.path.islink(osp.join(TRUNK,a))\n", "]):\n", " print(f\"Check {candidate_dir} for a main.yaml\")\n", " candidate_file=osp.join(TRUNK,candidate_dir,\"main.yaml\")\n", " if os.path.isfile(candidate_file):\n", " print(f\"Test {candidate_file}\")\n", " try:\n", " intake.open_catalog(candidate_file)\n", " print(f\"Successfully opened {candidate_file}\")\n", " poolcat_candidate[\"sources\"][candidate_dir]=deepcopy(template)\n", " poolcat_candidate[\"sources\"][candidate_dir][\"args\"][\"path\"].replace(\"/dkrz/\",f\"/{candidate_dir}/\")\n", " repo_catalog_copy=osp.join(poolcattrunk,candidate_dir,\"main.yaml\")\n", " fsspec.filesystem(\"file\").cp(candidate_file,repo_catalog_copy)\n", " except Exception as e:\n", " print(f\"Could not use catalog because of \\n{e}\")\n", "create_or_update(poolcat, poolcat_candidate)" ] }, { "cell_type": "code", "execution_count": null, "id": "c65ca3f3-f7d1-4185-81b8-c988ff7d268f", "metadata": { "tags": [] }, "outputs": [], "source": [ "intake.open_catalog(poolcat)" ] }, { "cell_type": "code", "execution_count": null, "id": "6a4932fc-ddc5-4a95-9cae-81043f71fd84", "metadata": { "tags": [] }, "outputs": [], "source": [ "!ls /home/k/k204210/dkrz-data-catalog/scripts/../pool/CMIP6/" ] }, { "cell_type": "code", "execution_count": null, "id": "0e9d5f53-aba5-4b39-8341-c518a0a620c1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dkrzcatalog", "language": "python", "name": "dkrzcatalog" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }