Create the Controlled Vocabulary of the DKRZ Data Catalog#

Hardcode#

cv=dict()
cv["required_metadata"]=[
    "relation",
    "account"
]
cv["relation_types"]=[
    "institution",
    "project",
    "model"
]
cv["relation"]={
    "mpi-m":{
        "type":"institution",
        "description":"Max-Planck-Institut for Meteorology"
    },
    "icon":{
        "type":"model",
        "description":"Earth System Model"
    },
    "nextgems":{
        "type":"project",
        "description":"Next Generation Earth System Models"
    },
}

Add relations from users#

import glob
import os
all_lakes=[
    relation.split('/')[-1]
    for relation in glob.glob("../lake/*")
    if not relation.endswith("sources") and not os.path.isdir(relation)
]
default_relation={
    "type":"unkown",
    "description":"unknown"
}
for relation in all_lakes:
    if not relation in cv["relation"]:
        cv["relation"][relation]=default_relation

Add accounts from work#

cv["account"]=sorted(
    os.listdir("/work")
)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[4], line 2
      1 cv["account"]=sorted(
----> 2     os.listdir("/work")
      3 )

FileNotFoundError: [Errno 2] No such file or directory: '/work'

Write CV#

import json
with open("../metadata_cv.json", "w") as f:
    f.write(
        json.dumps(cv,indent=4)
    )