Skip to content
Snippets Groups Projects
Commit b6fd7005 authored by Malte Nyhuis's avatar Malte Nyhuis
Browse files

new case_creation. object-based in templates

parent 35eb2869
No related branches found
No related tags found
No related merge requests found
import importlib_resources import importlib_resources
import os import os
import re
from functools import reduce
import shutil
import warnings
from ntrfc.utils.filehandling.datafiles import get_filelist_fromdir from ntrfc.utils.filehandling.datafiles import get_filelist_fromdir, inplace_change
from ntrfc.utils.dictionaries.dict_utils import set_in_dict, nested_dict_pairs_iterator
def get_directory_structure2(rootdir):
"""
Creates a nested dictionary that represents the folder structure of rootdir
"""
# test method
dir = {}
rootdir = os.path.join(rootdir)
rootdir = rootdir.rstrip(os.sep)
start = rootdir.rfind(os.sep) + 1
for path, dirs, files in os.walk(rootdir):
folders = path[start:].split(os.sep)
subdir = dict.fromkeys(files)
parent = reduce(dict.get, folders[:-1], dir)
parent[folders[-1]] = subdir
return dir[os.path.basename(rootdir)]
def find_vars(path_to_sim, sign):
"""
: param case_structure: dict - case-structure. can carry parameters
: param sign: str - sign of a parameter (Velocity -> U etc.)
: param all_pairs: dict - ?
: param path_to_sim: path - path-like object
return : ?
"""
case_structure = get_directory_structure2(path_to_sim)
all_files = [i[:-1] for i in list(nested_dict_pairs_iterator(case_structure))]
sim_variables = {}
for file in all_files:
filepath = os.path.join(path_to_sim, *file)
filevars =find_variables_infile(filepath, sign)
for k, v in filevars.items():
if k not in sim_variables:
sim_variables[k]=v
else:
sim_variables[k].append(v)
return sim_variables
def find_variables_infile(file, sign):
varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign)
siglim = (len(f"< {sign}"), -(len(f" {sign}>")))
variables = {}
with open(file, "r") as fhandle:
for line in fhandle.readlines():
lookaround = True
while lookaround:
lookup_var = re.search(varsignature, line)
if not lookup_var:
lookaround = False
assert sign not in line, f"parameter is not defined correct \n file: {filepath}\n line: {line}"
else:
span = lookup_var.span()
parameter = line[span[0] + siglim[0]:span[1] + siglim[1]]
# update
if parameter not in variables.keys():
variables[parameter] = []
if file not in variables[parameter]:
variables[parameter].append(file)
match = line[span[0]:span[1]]
line = line.replace(match, "")
return variables
def deploy(deply_sources,deploy_targets, deploy_params, deploy_options):
for source, target in zip(deply_sources,deploy_targets):
os.makedirs(os.path.dirname(target), exist_ok=True)
shutil.copyfile(source, target)
for parameter in deploy_params:
inplace_change(target, f"<PARAM {parameter} PARAM>", str(deploy_params[parameter]))
for option in deploy_options:
inplace_change(target, f"<OPTION {option} OPTION>", str(deploy_options[parameter]))
class case_template: class case_template:
psign = "PARAM"
osign = "OPTION"
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
self.path = importlib_resources.files("ntrfc") / f"database/case_templates/{name}" self.path = importlib_resources.files("ntrfc") / f"database/case_templates/{name}"
self.schema = importlib_resources.files("ntrfc") / f"database/case_templates/{name}.schema.yaml" self.schema = importlib_resources.files("ntrfc") / f"database/case_templates/{name}.schema.yaml"
self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)] self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)]
self.params = find_vars(self.path, self.psign)
self.params_set = {}
self.options = find_vars(self.path,self.osign)
self.options_set = {}
def set_params_options(self,params_set,options_set):
self.params_set = params_set
self.options_set = options_set
def sanity_check(self):
sanity = True
for p in self.params.keys():
if p not in self.params_set.keys():
sanity=False
warnings.warn(f"{p} not set")
for o in self.options.keys():
if o not in self.options_set.keys():
sanity=False
warnings.warn(f"{o} not set")
return sanity
""" """
avail_templates is used to create a dictionary of with case_templates-Objects avail_templates is used to create a dictionary of with case_templates-Objects
......
import copy
import os
import re
import shutil
from ntrfc.utils.filehandling.datafiles import inplace_change, get_directory_structure
from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator, set_in_dict, merge
from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES
def search_paras(case_structure, line, pair, siglim, varsignature, sign):
"""
"""
# todo docstring and test method
lookforvar = True
while (lookforvar):
lookup_var = re.search(varsignature, line)
if not lookup_var:
lookforvar = False
filename = os.path.join(*pair[:-1])
assert sign not in line, f"parameter is not defined correct \n file: {filename}\n line: {line}"
else:
span = lookup_var.span()
parameter = line[span[0] + siglim[0]:span[1] + siglim[1]]
# update
set_in_dict(case_structure, list(pair[:-1]) + [parameter], sign)
match = line[span[0]:span[1]]
line = line.replace(match, "")
return case_structure
def settings_sanity(case_structure, settings_dict):
"""
: params: case_structure dict
"""
necessarities = list(nested_dict_pairs_iterator(case_structure))
necessarity_vars = []
for item in necessarities:
if item[-1] == "PARAM" or item[-1] == "OPTION":
necessarity_vars.append(item[-2])
defined_variables = list(settings_dict.keys())
defined = []
undefined = []
unused = []
used = []
for variable in necessarity_vars:
if variable in defined_variables:
defined.append(variable)
else:
undefined.append(variable)
for variable in defined_variables:
if variable not in necessarity_vars:
unused.append(variable)
else:
used.append(variable)
return defined, undefined, used, unused
def create_case(input_files, output_files, template_name, simparams):
"""
:param input_files: list of template-files
:param output_files: list of outputfiles (same as input)
:param template_name: str - template-name
:param simparams: dict - dict-settings - passed via filenames
:return:
"""
found = template_name in CASE_TEMPLATES.keys()
assert found, "template unknown. check ntrfc.database.casetemplates directory"
template = CASE_TEMPLATES[template_name]
case_structure = get_directory_structure(template.path)
param_sign = "PARAM"
option_sign = "OPTION"
parameters = find_vars_opts(case_structure[template.name], template.path, param_sign)
options = find_vars_opts(case_structure[template.name], template.path, option_sign)
case_settings = merge(parameters, options)
allparams = merge(parameters,options)
defined, undefined, used, unused = settings_sanity(case_settings, simparams)
print("found ", str(len(defined)), " defined parameters")
print("found ", str(len(undefined)), " undefined parameters")
print("used ", str(len(used)), " parameters")
print("unused ", str(len(unused)), " parameters")
assert len(undefined) == 0, f"undefined parameters: {undefined}"
assert len(unused) == 0, f"unused parameters: {unused}"
necessarities = list(nested_dict_pairs_iterator(case_settings))
paramtypes = {}
for item in necessarities:
if item[-1] == "PARAM":
paramtypes[item[-2]]="PARAM"
elif item[-1] == "OPTION":
paramtypes[item[-2]] = "OPTION"
for templatefile, simfile in zip(input_files, output_files):
shutil.copyfile(templatefile, simfile)
for parameter in used:
sign = paramtypes[parameter]
inplace_change(simfile, f"<{sign} {parameter} {sign}>", str(simparams[parameter]))
def find_vars_opts(case_structure, path_to_sim, sign):
"""
: param case_structure: dict - case-structure. can carry parameters
: param sign: str - sign of a parameter (Velocity -> U etc.)
: param all_pairs: dict - ?
: param path_to_sim: path - path-like object
return : ?
"""
# allowing names like JOB_NUMBERS, only capital letters and underlines - no digits, no whitespaces
datadict = copy.deepcopy(case_structure)
all_pairs = list(nested_dict_pairs_iterator(case_structure))
varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign)
# int
# float
# string
# todo move into param-module
siglim = (len(f"< {sign}"), -(len(f" {sign}>")))
for pair in all_pairs:
# if os.path.isfile(os.path.join(path_to_sim,*pair)):
set_in_dict(datadict, pair[:-1], {})
filepath = os.path.join(*pair[:-1])
with open(os.path.join(path_to_sim, filepath), "r") as fhandle:
for line in fhandle.readlines():
datadict = search_paras(datadict, line, pair, siglim, varsignature, sign)
return datadict
...@@ -82,6 +82,7 @@ def get_directory_structure(rootdir): ...@@ -82,6 +82,7 @@ def get_directory_structure(rootdir):
return dir return dir
def get_filelist_fromdir(path): def get_filelist_fromdir(path):
filelist = [] filelist = []
for r, d, f in os.walk(path): for r, d, f in os.walk(path):
......
...@@ -22,30 +22,6 @@ def test_casestructure(tmpdir): ...@@ -22,30 +22,6 @@ def test_casestructure(tmpdir):
"directory"].keys(), "error collecting case_structure" "directory"].keys(), "error collecting case_structure"
def test_findvarsopts(tmpdir):
import os
from ntrfc.preprocessing.case_creation.create_case import find_vars_opts
from ntrfc.utils.filehandling.datafiles import get_directory_structure
paramnameone = "parameter_name_one"
paramnametwo = "parameter_name_two"
filecontent = f"""
<PARAM {paramnameone} PARAM>
<PARAM {paramnametwo} PARAM>
"""
filename = "simstuff.txt"
with open(os.path.join(tmpdir, filename), "w") as fhandle:
fhandle.write(filecontent)
case_structure = get_directory_structure(tmpdir)
parameters = find_vars_opts(case_structure, tmpdir.dirname,"PARAM")
assert (parameters[tmpdir.basename][filename][paramnameone] == "PARAM" and parameters[tmpdir.basename][filename][
paramnametwo] == "PARAM"), "not all variablees were found in test-run"
def test_template_installations(): def test_template_installations():
""" """
basic sanity check over the installed templates basic sanity check over the installed templates
...@@ -63,25 +39,16 @@ def test_templates_params(): ...@@ -63,25 +39,16 @@ def test_templates_params():
""" """
""" """
import os
from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES
from ntrfc.utils.filehandling.datafiles import yaml_dict_read from ntrfc.utils.filehandling.datafiles import yaml_dict_read
from ntrfc.utils.filehandling.datafiles import get_directory_structure
from ntrfc.preprocessing.case_creation.create_case import find_vars_opts, settings_sanity
from ntrfc.utils.dictionaries.dict_utils import merge
for name, template in CASE_TEMPLATES.items(): for name, template in CASE_TEMPLATES.items():
schema = template.schema schema = template.schema
schema_dict = yaml_dict_read(schema) schema_dict = yaml_dict_read(schema)
default_params = {key: value["default"] for (key, value) in schema_dict["properties"].items()} default_params = {key: value["default"] for (key, value) in schema_dict["properties"].items()}
path = template.path
tpath = os.path.join(path, "..") template.set_params_options(default_params,options)
case_structure = get_directory_structure(path)
parameters = find_vars_opts(case_structure, tpath, "PARAM")
options = find_vars_opts(case_structure, tpath,"OPTION")
case_settings = merge(parameters,options)
defined, undefined, used, unused = settings_sanity(case_settings, default_params)
assert len(undefined) == 0, f"some parameters have no default: {undefined}" assert len(undefined) == 0, f"some parameters have no default: {undefined}"
assert len(unused) == 0, f"some parameters are not used: {unused}" assert len(unused) == 0, f"some parameters are not used: {unused}"
...@@ -91,10 +58,9 @@ def test_create_case(tmpdir): ...@@ -91,10 +58,9 @@ def test_create_case(tmpdir):
""" """
import os import os
from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES, deploy
from ntrfc.utils.filehandling.datafiles import yaml_dict_read from ntrfc.utils.filehandling.datafiles import yaml_dict_read
from ntrfc.utils.filehandling.datafiles import create_dirstructure from ntrfc.utils.filehandling.datafiles import create_dirstructure
from ntrfc.preprocessing.case_creation.create_case import create_case
template = list(CASE_TEMPLATES.values())[0] template = list(CASE_TEMPLATES.values())[0]
templatefiles = template.files templatefiles = template.files
...@@ -109,8 +75,6 @@ def test_create_case(tmpdir): ...@@ -109,8 +75,6 @@ def test_create_case(tmpdir):
#sim_params = merge(default_params, default_options) #sim_params = merge(default_params, default_options)
os.mkdir(os.path.join(tmpdir, template.name)) os.mkdir(os.path.join(tmpdir, template.name))
# it is necessary to create the directory structure before creating the files.
# in snakemake this step can be skipped
create_dirstructure(directories, os.path.join(tmpdir, template.name)) create_dirstructure(directories, os.path.join(tmpdir, template.name))
create_case(input, output, template.name, default_params) create_case(input, output, template.name, default_params)
...@@ -122,7 +86,6 @@ def test_search_paras(tmpdir): ...@@ -122,7 +86,6 @@ def test_search_paras(tmpdir):
import os import os
from ntrfc.utils.filehandling.datafiles import get_directory_structure from ntrfc.utils.filehandling.datafiles import get_directory_structure
from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator
from ntrfc.preprocessing.case_creation.create_case import search_paras
paramnameone = "parameter_name_one" paramnameone = "parameter_name_one"
paramnametwo = "parameter_name_two" paramnametwo = "parameter_name_two"
......
...@@ -6,40 +6,34 @@ from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES ...@@ -6,40 +6,34 @@ from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES
configfile : "casesettings.yaml" configfile : "casesettings.yaml"
TEMPLATE = CASE_TEMPLATES[config["case_params"]["case_type"]]
PARAMS = pd.read_csv("caseparams.tsv",sep="\t")
def validate_configuration(config): def validate_configuration(config):
validate(config,"config.schema.yaml") validate(config,"config.schema.yaml")
validate(PARAMS, TEMPLATE.schema) template = CASE_TEMPLATES[config["case_params"]["case_type"]]
PARAMS = pd.read_csv("caseparams.tsv",sep="\t")
validate(PARAMS, template.schema)
paramspace = Paramspace(PARAMS) paramspace = Paramspace(PARAMS)
return paramspace, config return template, paramspace, config
paramspace, config = validate_configuration(config) template, paramspace, config = validate_configuration(config)
rule all: rule all:
input: input:
# Aggregate over entire parameter space (or a subset thereof if needed) # Aggregate over entire parameter space (or a subset thereof if needed)
# of course, something like this can happen anywhere in the workflow (not # of course, something like this can happen anywhere in the workflow (not
# only at the end). # only at the end).
*[f"01_Simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file in TEMPLATE.files] *[f"01_Simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file in template.files]
rule create_case: rule create_case:
input: input:
[f"{TEMPLATE.path}/{file}" for file in TEMPLATE.files] [f"{template.path}/{file}" for file in template.files]
output: output:
# format a wildcard pattern like "alpha~{alpha}/beta~{beta}/gamma~{gamma}" # format a wildcard pattern like "alpha~{alpha}/beta~{beta}/gamma~{gamma}"
# into a file path, with alpha, beta, gamma being the columns of the data frame # into a file path, with alpha, beta, gamma being the columns of the data frame
*[f"01_Simulations/{paramspace.wildcard_pattern}/{file}" for file in TEMPLATE.files] *[f"01_Simulations/{paramspace.wildcard_pattern}/{file}" for file in template.files]
params: params:
case_type = config["case_params"]["case_type"],
# automatically translate the wildcard values into an instance of the param space # automatically translate the wildcard values into an instance of the param space
# in the form of a dict (here: {"alpha": ..., "beta": ..., "gamma": ...}) # in the form of a dict (here: {"alpha": ..., "beta": ..., "gamma": ...})
simparams = paramspace.instance simparams = paramspace.instance
run: run:
from ntrfc.preprocessing.case_creation.create_case import create_case from ntrfc.database.case_templates.case_templates import deploy
from ntrfc.utils.dictionaries.dict_utils import merge deploy(input,output,params["simparams"],config["case_options"])
simconfig = merge(params["simparams"],config["case_options"])
create_case(input,output,params["case_type"],simconfig)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment