{ "cells": [ { "cell_type": "markdown", "id": "6cb8906f-d211-4bfe-8f2f-809879ae9408", "metadata": {}, "source": [ "# Create the Controlled Vocabulary of the DKRZ Data Catalog" ] }, { "cell_type": "markdown", "id": "82ce7dbe-451d-467d-8cc2-84efb69b253e", "metadata": {}, "source": [ "# Hardcode" ] }, { "cell_type": "code", "execution_count": null, "id": "e9b12066-5de2-40ca-bc07-8e8a00a1bdfd", "metadata": { "tags": [] }, "outputs": [], "source": [ "cv=dict()\n", "cv[\"required_metadata\"]=[\n", " \"relation\",\n", " \"account\"\n", "]\n", "cv[\"relation_types\"]=[\n", " \"institution\",\n", " \"project\",\n", " \"model\"\n", "]\n", "cv[\"relation\"]={\n", " \"mpi-m\":{\n", " \"type\":\"institution\",\n", " \"description\":\"Max-Planck-Institut for Meteorology\"\n", " },\n", " \"icon\":{\n", " \"type\":\"model\",\n", " \"description\":\"Earth System Model\"\n", " },\n", " \"nextgems\":{\n", " \"type\":\"project\",\n", " \"description\":\"Next Generation Earth System Models\"\n", " },\n", "}\n" ] }, { "cell_type": "markdown", "id": "ffa708ca-d4ff-4bcd-a795-ddd19667aaa3", "metadata": {}, "source": [ "# Add relations from users" ] }, { "cell_type": "code", "execution_count": null, "id": "7a136684-9e4d-43d0-845b-8dce5ac09e18", "metadata": { "tags": [] }, "outputs": [], "source": [ "import glob\n", "import os\n", "all_lakes=[\n", " relation.split('/')[-1]\n", " for relation in glob.glob(\"../lake/*\")\n", " if not relation.endswith(\"sources\") and not os.path.isdir(relation)\n", "]" ] }, { "cell_type": "code", "execution_count": null, "id": "e9121392-49a6-436d-aa0d-e8abec1a2be3", "metadata": { "tags": [] }, "outputs": [], "source": [ "default_relation={\n", " \"type\":\"unkown\",\n", " \"description\":\"unknown\"\n", "}\n", "for relation in all_lakes:\n", " if not relation in cv[\"relation\"]:\n", " cv[\"relation\"][relation]=default_relation" ] }, { "cell_type": "markdown", "id": "edfacec4-8bda-4608-9689-8e77ef03a266", "metadata": {}, "source": [ "# Add accounts from work" ] }, { "cell_type": "code", "execution_count": null, "id": "776c4575-f01a-4432-9cac-a655a01bfa70", "metadata": { "tags": [] }, "outputs": [], "source": [ "cv[\"account\"]=sorted(\n", " os.listdir(\"/work\")\n", ")" ] }, { "cell_type": "markdown", "id": "7b38edd5-36ab-47ff-a8df-8a289a8ad472", "metadata": {}, "source": [ "# Write CV" ] }, { "cell_type": "code", "execution_count": null, "id": "c8d708b3-96c5-496d-aacf-b12fdba78d9d", "metadata": { "tags": [] }, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": null, "id": "a52e5904-b7bf-43ed-b27d-31c9628e59f8", "metadata": { "tags": [] }, "outputs": [], "source": [ "with open(\"../metadata_cv.json\", \"w\") as f:\n", " f.write(\n", " json.dumps(cv,indent=4)\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "e1b07d9f-e1cf-435e-86f1-1eeabd4dc830", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dkrzcatalog", "language": "python", "name": "dkrzcatalog" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }