diff --git a/ntrfc/database/case_templates/case_templates.py b/ntrfc/database/case_templates/case_templates.py index 3b2bb9edb3b915c5adfb682f3a5102562e67d8e3..f7cb2b9d0a5abcc51a9d9a9940df363e5c38595e 100644 --- a/ntrfc/database/case_templates/case_templates.py +++ b/ntrfc/database/case_templates/case_templates.py @@ -1,16 +1,120 @@ import importlib_resources import os +import re +from functools import reduce +import shutil +import warnings -from ntrfc.utils.filehandling.datafiles import get_filelist_fromdir +from ntrfc.utils.filehandling.datafiles import get_filelist_fromdir, inplace_change +from ntrfc.utils.dictionaries.dict_utils import set_in_dict, nested_dict_pairs_iterator + +def get_directory_structure2(rootdir): + """ + Creates a nested dictionary that represents the folder structure of rootdir + """ + # test method + dir = {} + rootdir = os.path.join(rootdir) + rootdir = rootdir.rstrip(os.sep) + start = rootdir.rfind(os.sep) + 1 + for path, dirs, files in os.walk(rootdir): + folders = path[start:].split(os.sep) + subdir = dict.fromkeys(files) + parent = reduce(dict.get, folders[:-1], dir) + parent[folders[-1]] = subdir + return dir[os.path.basename(rootdir)] + +def find_vars(path_to_sim, sign): + """ + : param case_structure: dict - case-structure. can carry parameters + : param sign: str - sign of a parameter (Velocity -> U etc.) + : param all_pairs: dict - ? + : param path_to_sim: path - path-like object + return : ? + """ + case_structure = get_directory_structure2(path_to_sim) + all_files = [i[:-1] for i in list(nested_dict_pairs_iterator(case_structure))] + + sim_variables = {} + for file in all_files: + filepath = os.path.join(path_to_sim, *file) + filevars =find_variables_infile(filepath, sign) + for k, v in filevars.items(): + if k not in sim_variables: + sim_variables[k]=v + else: + sim_variables[k].append(v) + return sim_variables + + +def find_variables_infile(file, sign): + + varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign) + siglim = (len(f"< {sign}"), -(len(f" {sign}>"))) + variables = {} + with open(file, "r") as fhandle: + for line in fhandle.readlines(): + lookaround = True + while lookaround: + lookup_var = re.search(varsignature, line) + if not lookup_var: + lookaround = False + assert sign not in line, f"parameter is not defined correct \n file: {filepath}\n line: {line}" + else: + span = lookup_var.span() + parameter = line[span[0] + siglim[0]:span[1] + siglim[1]] + # update + if parameter not in variables.keys(): + variables[parameter] = [] + if file not in variables[parameter]: + variables[parameter].append(file) + match = line[span[0]:span[1]] + line = line.replace(match, "") + return variables + +def deploy(deply_sources,deploy_targets, deploy_params, deploy_options): + for source, target in zip(deply_sources,deploy_targets): + os.makedirs(os.path.dirname(target), exist_ok=True) + shutil.copyfile(source, target) + for parameter in deploy_params: + inplace_change(target, f"<PARAM {parameter} PARAM>", str(deploy_params[parameter])) + for option in deploy_options: + inplace_change(target, f"<OPTION {option} OPTION>", str(deploy_options[parameter])) class case_template: + + psign = "PARAM" + osign = "OPTION" + def __init__(self, name): self.name = name self.path = importlib_resources.files("ntrfc") / f"database/case_templates/{name}" self.schema = importlib_resources.files("ntrfc") / f"database/case_templates/{name}.schema.yaml" self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)] + self.params = find_vars(self.path, self.psign) + self.params_set = {} + self.options = find_vars(self.path,self.osign) + self.options_set = {} + + def set_params_options(self,params_set,options_set): + self.params_set = params_set + self.options_set = options_set + + def sanity_check(self): + sanity = True + for p in self.params.keys(): + if p not in self.params_set.keys(): + sanity=False + warnings.warn(f"{p} not set") + for o in self.options.keys(): + if o not in self.options_set.keys(): + sanity=False + warnings.warn(f"{o} not set") + return sanity + + """ avail_templates is used to create a dictionary of with case_templates-Objects diff --git a/ntrfc/preprocessing/case_creation/__init__.py b/ntrfc/preprocessing/case_creation/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/ntrfc/preprocessing/case_creation/create_case.py b/ntrfc/preprocessing/case_creation/create_case.py deleted file mode 100644 index b0886773be8dbf43af299d645c66403a6687bde9..0000000000000000000000000000000000000000 --- a/ntrfc/preprocessing/case_creation/create_case.py +++ /dev/null @@ -1,135 +0,0 @@ -import copy -import os -import re -import shutil - -from ntrfc.utils.filehandling.datafiles import inplace_change, get_directory_structure -from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator, set_in_dict, merge -from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES - - -def search_paras(case_structure, line, pair, siglim, varsignature, sign): - """ - - """ - # todo docstring and test method - lookforvar = True - while (lookforvar): - lookup_var = re.search(varsignature, line) - if not lookup_var: - lookforvar = False - filename = os.path.join(*pair[:-1]) - assert sign not in line, f"parameter is not defined correct \n file: {filename}\n line: {line}" - else: - span = lookup_var.span() - parameter = line[span[0] + siglim[0]:span[1] + siglim[1]] - # update - set_in_dict(case_structure, list(pair[:-1]) + [parameter], sign) - match = line[span[0]:span[1]] - line = line.replace(match, "") - return case_structure - - -def settings_sanity(case_structure, settings_dict): - """ - - : params: case_structure dict - """ - - necessarities = list(nested_dict_pairs_iterator(case_structure)) - necessarity_vars = [] - for item in necessarities: - if item[-1] == "PARAM" or item[-1] == "OPTION": - necessarity_vars.append(item[-2]) - - defined_variables = list(settings_dict.keys()) - - defined = [] - undefined = [] - unused = [] - used = [] - for variable in necessarity_vars: - if variable in defined_variables: - defined.append(variable) - else: - undefined.append(variable) - for variable in defined_variables: - if variable not in necessarity_vars: - unused.append(variable) - else: - used.append(variable) - return defined, undefined, used, unused - - -def create_case(input_files, output_files, template_name, simparams): - """ - :param input_files: list of template-files - :param output_files: list of outputfiles (same as input) - :param template_name: str - template-name - :param simparams: dict - dict-settings - passed via filenames - :return: - """ - - found = template_name in CASE_TEMPLATES.keys() - assert found, "template unknown. check ntrfc.database.casetemplates directory" - template = CASE_TEMPLATES[template_name] - case_structure = get_directory_structure(template.path) - - param_sign = "PARAM" - option_sign = "OPTION" - - parameters = find_vars_opts(case_structure[template.name], template.path, param_sign) - options = find_vars_opts(case_structure[template.name], template.path, option_sign) - case_settings = merge(parameters, options) - - allparams = merge(parameters,options) - defined, undefined, used, unused = settings_sanity(case_settings, simparams) - print("found ", str(len(defined)), " defined parameters") - print("found ", str(len(undefined)), " undefined parameters") - print("used ", str(len(used)), " parameters") - print("unused ", str(len(unused)), " parameters") - - assert len(undefined) == 0, f"undefined parameters: {undefined}" - assert len(unused) == 0, f"unused parameters: {unused}" - - necessarities = list(nested_dict_pairs_iterator(case_settings)) - paramtypes = {} - for item in necessarities: - if item[-1] == "PARAM": - paramtypes[item[-2]]="PARAM" - elif item[-1] == "OPTION": - paramtypes[item[-2]] = "OPTION" - - for templatefile, simfile in zip(input_files, output_files): - shutil.copyfile(templatefile, simfile) - for parameter in used: - sign = paramtypes[parameter] - inplace_change(simfile, f"<{sign} {parameter} {sign}>", str(simparams[parameter])) - - -def find_vars_opts(case_structure, path_to_sim, sign): - """ - : param case_structure: dict - case-structure. can carry parameters - : param sign: str - sign of a parameter (Velocity -> U etc.) - : param all_pairs: dict - ? - : param path_to_sim: path - path-like object - return : ? - """ - # allowing names like JOB_NUMBERS, only capital letters and underlines - no digits, no whitespaces - datadict = copy.deepcopy(case_structure) - all_pairs = list(nested_dict_pairs_iterator(case_structure)) - varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign) - # int - # float - # string - # todo move into param-module - siglim = (len(f"< {sign}"), -(len(f" {sign}>"))) - - for pair in all_pairs: - # if os.path.isfile(os.path.join(path_to_sim,*pair)): - set_in_dict(datadict, pair[:-1], {}) - filepath = os.path.join(*pair[:-1]) - with open(os.path.join(path_to_sim, filepath), "r") as fhandle: - for line in fhandle.readlines(): - datadict = search_paras(datadict, line, pair, siglim, varsignature, sign) - return datadict diff --git a/ntrfc/utils/filehandling/datafiles.py b/ntrfc/utils/filehandling/datafiles.py index 8e58f86d4bab06877b35c6dd771850c40d14111a..86d6e352d0e4a7d1d699726f75c51d6dddb37190 100644 --- a/ntrfc/utils/filehandling/datafiles.py +++ b/ntrfc/utils/filehandling/datafiles.py @@ -82,6 +82,7 @@ def get_directory_structure(rootdir): return dir + def get_filelist_fromdir(path): filelist = [] for r, d, f in os.walk(path): diff --git a/tests/test_ntrfc_case.py b/tests/test_ntrfc_case.py index 061d4dc299a6a1adad5e0027a56242656619e9ba..ac649759c3d8f01a3f7fd23d636527aa98ec60ca 100644 --- a/tests/test_ntrfc_case.py +++ b/tests/test_ntrfc_case.py @@ -22,30 +22,6 @@ def test_casestructure(tmpdir): "directory"].keys(), "error collecting case_structure" -def test_findvarsopts(tmpdir): - import os - from ntrfc.preprocessing.case_creation.create_case import find_vars_opts - from ntrfc.utils.filehandling.datafiles import get_directory_structure - - paramnameone = "parameter_name_one" - paramnametwo = "parameter_name_two" - - filecontent = f""" - <PARAM {paramnameone} PARAM> - <PARAM {paramnametwo} PARAM> - """ - - filename = "simstuff.txt" - - with open(os.path.join(tmpdir, filename), "w") as fhandle: - fhandle.write(filecontent) - case_structure = get_directory_structure(tmpdir) - - parameters = find_vars_opts(case_structure, tmpdir.dirname,"PARAM") - assert (parameters[tmpdir.basename][filename][paramnameone] == "PARAM" and parameters[tmpdir.basename][filename][ - paramnametwo] == "PARAM"), "not all variablees were found in test-run" - - def test_template_installations(): """ basic sanity check over the installed templates @@ -63,25 +39,16 @@ def test_templates_params(): """ """ - import os from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES from ntrfc.utils.filehandling.datafiles import yaml_dict_read - from ntrfc.utils.filehandling.datafiles import get_directory_structure - from ntrfc.preprocessing.case_creation.create_case import find_vars_opts, settings_sanity - from ntrfc.utils.dictionaries.dict_utils import merge for name, template in CASE_TEMPLATES.items(): schema = template.schema schema_dict = yaml_dict_read(schema) default_params = {key: value["default"] for (key, value) in schema_dict["properties"].items()} - path = template.path - tpath = os.path.join(path, "..") - case_structure = get_directory_structure(path) - parameters = find_vars_opts(case_structure, tpath, "PARAM") - options = find_vars_opts(case_structure, tpath,"OPTION") - case_settings = merge(parameters,options) - defined, undefined, used, unused = settings_sanity(case_settings, default_params) + + template.set_params_options(default_params,options) assert len(undefined) == 0, f"some parameters have no default: {undefined}" assert len(unused) == 0, f"some parameters are not used: {unused}" @@ -91,10 +58,9 @@ def test_create_case(tmpdir): """ import os - from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES + from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES, deploy from ntrfc.utils.filehandling.datafiles import yaml_dict_read from ntrfc.utils.filehandling.datafiles import create_dirstructure - from ntrfc.preprocessing.case_creation.create_case import create_case template = list(CASE_TEMPLATES.values())[0] templatefiles = template.files @@ -109,8 +75,6 @@ def test_create_case(tmpdir): #sim_params = merge(default_params, default_options) os.mkdir(os.path.join(tmpdir, template.name)) - # it is necessary to create the directory structure before creating the files. - # in snakemake this step can be skipped create_dirstructure(directories, os.path.join(tmpdir, template.name)) create_case(input, output, template.name, default_params) @@ -122,7 +86,6 @@ def test_search_paras(tmpdir): import os from ntrfc.utils.filehandling.datafiles import get_directory_structure from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator - from ntrfc.preprocessing.case_creation.create_case import search_paras paramnameone = "parameter_name_one" paramnametwo = "parameter_name_two" diff --git a/workflows/case_creation/Snakefile b/workflows/case_creation/Snakefile index 0800c7fbbf61214d191abab34aab1d5c3e5a47dd..6baa28fdd5f26b2aa73aa8e96a385e54df803b59 100644 --- a/workflows/case_creation/Snakefile +++ b/workflows/case_creation/Snakefile @@ -6,40 +6,34 @@ from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES configfile : "casesettings.yaml" -TEMPLATE = CASE_TEMPLATES[config["case_params"]["case_type"]] -PARAMS = pd.read_csv("caseparams.tsv",sep="\t") - - def validate_configuration(config): validate(config,"config.schema.yaml") - validate(PARAMS, TEMPLATE.schema) + template = CASE_TEMPLATES[config["case_params"]["case_type"]] + PARAMS = pd.read_csv("caseparams.tsv",sep="\t") + validate(PARAMS, template.schema) paramspace = Paramspace(PARAMS) - return paramspace, config + return template, paramspace, config -paramspace, config = validate_configuration(config) +template, paramspace, config = validate_configuration(config) rule all: input: # Aggregate over entire parameter space (or a subset thereof if needed) # of course, something like this can happen anywhere in the workflow (not # only at the end). - *[f"01_Simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file in TEMPLATE.files] + *[f"01_Simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file in template.files] rule create_case: input: - [f"{TEMPLATE.path}/{file}" for file in TEMPLATE.files] + [f"{template.path}/{file}" for file in template.files] output: # format a wildcard pattern like "alpha~{alpha}/beta~{beta}/gamma~{gamma}" # into a file path, with alpha, beta, gamma being the columns of the data frame - *[f"01_Simulations/{paramspace.wildcard_pattern}/{file}" for file in TEMPLATE.files] + *[f"01_Simulations/{paramspace.wildcard_pattern}/{file}" for file in template.files] params: - case_type = config["case_params"]["case_type"], # automatically translate the wildcard values into an instance of the param space # in the form of a dict (here: {"alpha": ..., "beta": ..., "gamma": ...}) simparams = paramspace.instance run: - from ntrfc.preprocessing.case_creation.create_case import create_case - from ntrfc.utils.dictionaries.dict_utils import merge - - simconfig = merge(params["simparams"],config["case_options"]) - create_case(input,output,params["case_type"],simconfig) + from ntrfc.database.case_templates.case_templates import deploy + deploy(input,output,params["simparams"],config["case_options"])