Create the Controlled Vocabulary of the DKRZ Data Catalog#
Hardcode#
cv=dict()
cv["required_metadata"]=[
"relation",
"account"
]
cv["relation_types"]=[
"institution",
"project",
"model"
]
cv["relation"]={
"mpi-m":{
"type":"institution",
"description":"Max-Planck-Institut for Meteorology"
},
"icon":{
"type":"model",
"description":"Earth System Model"
},
"nextgems":{
"type":"project",
"description":"Next Generation Earth System Models"
},
}
Add relations from users#
import glob
import os
all_lakes=[
relation.split('/')[-1]
for relation in glob.glob("../lake/*")
if not relation.endswith("sources") and not os.path.isdir(relation)
]
default_relation={
"type":"unkown",
"description":"unknown"
}
for relation in all_lakes:
if not relation in cv["relation"]:
cv["relation"][relation]=default_relation
Add accounts from work#
cv["account"]=sorted(
os.listdir("/work")
)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[4], line 2
1 cv["account"]=sorted(
----> 2 os.listdir("/work")
3 )
FileNotFoundError: [Errno 2] No such file or directory: '/work'
Write CV#
import json
with open("../metadata_cv.json", "w") as f:
f.write(
json.dumps(cv,indent=4)
)