Create a catalog for /pool/data#
import intake
import os
import yaml
import fsspec
from copy import deepcopy
from tqdm import tqdm
def create_or_update(intfile, ss):
os.makedirs(os.path.dirname(intfile),exist_ok=True)
if os.path.isfile(intfile):
with fsspec.open(intfile,"r") as f:
cat=yaml.full_load(f,)
cat["sources"].update(ss["sources"])
with fsspec.open(intfile,"w") as f:
yaml.dump(cat,f)
else:
with fsspec.open(intfile,"w") as f:
yaml.dump(ss,f)
template={
"args":{
"path": "{{CATALOG_DIR}}/dkrz/main.yaml"
},
"description": "This catalog contains datasets for EERIE stored on DKRZ",
"driver": "yaml_file_cat"
}
TRUNK="/pool/data/"
osp=os.path
poolcattrunk="../pool"
poolcat=osp.join(poolcattrunk,"main.yaml")
poolcat_candidate=dict(sources=dict())
for candidate_dir in tqdm([
a
for a in os.listdir(TRUNK)
# if os.path.islink(osp.join(TRUNK,a))
]):
print(f"Check {candidate_dir} for a main.yaml")
candidate_file=osp.join(TRUNK,candidate_dir,"main.yaml")
if os.path.isfile(candidate_file):
print(f"Test {candidate_file}")
try:
intake.open_catalog(candidate_file)
print(f"Successfully opened {candidate_file}")
poolcat_candidate["sources"][candidate_dir]=deepcopy(template)
poolcat_candidate["sources"][candidate_dir]["args"]["path"].replace("/dkrz/",f"/{candidate_dir}/")
repo_catalog_copy=osp.join(poolcattrunk,candidate_dir,"main.yaml")
fsspec.filesystem("file").cp(candidate_file,repo_catalog_copy)
except Exception as e:
print(f"Could not use catalog because of \n{e}")
create_or_update(poolcat, poolcat_candidate)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[4], line 8
4 poolcat=osp.join(poolcattrunk,"main.yaml")
5 poolcat_candidate=dict(sources=dict())
6 for candidate_dir in tqdm([
7 a
----> 8 for a in os.listdir(TRUNK)
9 # if os.path.islink(osp.join(TRUNK,a))
10 ]):
11 print(f"Check {candidate_dir} for a main.yaml")
12 candidate_file=osp.join(TRUNK,candidate_dir,"main.yaml")
FileNotFoundError: [Errno 2] No such file or directory: '/pool/data/'
intake.open_catalog(poolcat)
!ls /home/k/k204210/dkrz-data-catalog/scripts/../pool/CMIP6/