Create a catalog for /pool/data

Create a catalog for /pool/data#

import intake
import os
import yaml
import fsspec
from copy import deepcopy
from tqdm import tqdm
def create_or_update(intfile, ss):
    os.makedirs(os.path.dirname(intfile),exist_ok=True)
    if os.path.isfile(intfile):
        with fsspec.open(intfile,"r") as f:
            cat=yaml.full_load(f,)
            cat["sources"].update(ss["sources"])
        with fsspec.open(intfile,"w") as f:
            yaml.dump(cat,f)
    else:
        with fsspec.open(intfile,"w") as f:
            yaml.dump(ss,f)
template={
    "args":{
      "path": "{{CATALOG_DIR}}/dkrz/main.yaml"
    },
    "description": "This catalog contains datasets for EERIE stored on DKRZ",
    "driver": "yaml_file_cat"
}
TRUNK="/pool/data/"
osp=os.path
poolcattrunk="../pool"
poolcat=osp.join(poolcattrunk,"main.yaml")
poolcat_candidate=dict(sources=dict())
for candidate_dir in tqdm([
    a 
    for a in os.listdir(TRUNK)
#    if os.path.islink(osp.join(TRUNK,a))
]):
    print(f"Check {candidate_dir} for a main.yaml")
    candidate_file=osp.join(TRUNK,candidate_dir,"main.yaml")
    if os.path.isfile(candidate_file):
        print(f"Test {candidate_file}")
        try:
            intake.open_catalog(candidate_file)
            print(f"Successfully opened {candidate_file}")
            poolcat_candidate["sources"][candidate_dir]=deepcopy(template)
            poolcat_candidate["sources"][candidate_dir]["args"]["path"].replace("/dkrz/",f"/{candidate_dir}/")
            repo_catalog_copy=osp.join(poolcattrunk,candidate_dir,"main.yaml")
            fsspec.filesystem("file").cp(candidate_file,repo_catalog_copy)
        except Exception as e:
            print(f"Could not use catalog because of \n{e}")
create_or_update(poolcat, poolcat_candidate)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[4], line 8
      4 poolcat=osp.join(poolcattrunk,"main.yaml")
      5 poolcat_candidate=dict(sources=dict())
      6 for candidate_dir in tqdm([
      7     a 
----> 8     for a in os.listdir(TRUNK)
      9 #    if os.path.islink(osp.join(TRUNK,a))
     10 ]):
     11     print(f"Check {candidate_dir} for a main.yaml")
     12     candidate_file=osp.join(TRUNK,candidate_dir,"main.yaml")

FileNotFoundError: [Errno 2] No such file or directory: '/pool/data/'
intake.open_catalog(poolcat)
!ls /home/k/k204210/dkrz-data-catalog/scripts/../pool/CMIP6/