Skip to content
Snippets Groups Projects
Commit de6cdbc9 authored by Malte Nyhuis's avatar Malte Nyhuis
Browse files

refactoring workflow

parent b6fd7005
No related branches found
No related tags found
No related merge requests found
import os
import re
import shutil
import warnings
from functools import reduce
import importlib_resources
from ntrfc.utils.dictionaries.dict_utils import nested_dict_pairs_iterator
from ntrfc.utils.filehandling.datafiles import inplace_change, get_filelist_fromdir
def get_directory_structure2(rootdir):
"""
Creates a nested dictionary that represents the folder structure of rootdir
"""
# test method
dir = {}
rootdir = os.path.join(rootdir)
rootdir = rootdir.rstrip(os.sep)
start = rootdir.rfind(os.sep) + 1
for path, dirs, files in os.walk(rootdir):
folders = path[start:].split(os.sep)
subdir = dict.fromkeys(files)
parent = reduce(dict.get, folders[:-1], dir)
parent[folders[-1]] = subdir
return dir[os.path.basename(rootdir)]
def find_vars(path_to_sim, sign):
"""
: param case_structure: dict - case-structure. can carry parameters
: param sign: str - sign of a parameter (Velocity -> U etc.)
: param all_pairs: dict - ?
: param path_to_sim: path - path-like object
return : ?
"""
case_structure = get_directory_structure2(path_to_sim)
all_files = [i[:-1] for i in list(nested_dict_pairs_iterator(case_structure))]
sim_variables = {}
for file in all_files:
filepath = os.path.join(path_to_sim, *file)
filevars =find_variables_infile(filepath, sign)
for k, v in filevars.items():
if k not in sim_variables:
sim_variables[k]=v
else:
sim_variables[k].append(v)
return sim_variables
def find_variables_infile(file, sign):
varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign)
siglim = (len(f"< {sign}"), -(len(f" {sign}>")))
variables = {}
with open(file, "r") as fhandle:
for line in fhandle.readlines():
lookaround = True
while lookaround:
lookup_var = re.search(varsignature, line)
if not lookup_var:
lookaround = False
assert sign not in line, f"parameter is not defined correct \n file: {filepath}\n line: {line}"
else:
span = lookup_var.span()
parameter = line[span[0] + siglim[0]:span[1] + siglim[1]]
# update
if parameter not in variables.keys():
variables[parameter] = []
if file not in variables[parameter]:
variables[parameter].append(file)
match = line[span[0]:span[1]]
line = line.replace(match, "")
return variables
def deploy(deply_sources,deploy_targets, deploy_params, deploy_options):
for source, target in zip(deply_sources,deploy_targets):
os.makedirs(os.path.dirname(target), exist_ok=True)
shutil.copyfile(source, target)
for parameter in deploy_params:
inplace_change(target, f"<PARAM {parameter} PARAM>", str(deploy_params[parameter]))
for option in deploy_options:
inplace_change(target, f"<OPTION {option} OPTION>", str(deploy_options[parameter]))
class case_template:
psign = "PARAM"
osign = "OPTION"
def __init__(self, name):
self.name = name
self.path = importlib_resources.files("ntrfc") / f"database/case_templates/{name}"
self.schema = importlib_resources.files("ntrfc") / f"database/case_templates/{name}.schema.yaml"
self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)]
self.params = find_vars(self.path, self.psign)
self.params_set = {}
self.options = find_vars(self.path,self.osign)
self.options_set = {}
def set_params_options(self,params_set,options_set):
self.params_set = params_set
self.options_set = options_set
def sanity_check(self):
sanity = True
for p in self.params.keys():
if p not in self.params_set.keys():
sanity=False
warnings.warn(f"{p} not set")
for o in self.options.keys():
if o not in self.options_set.keys():
sanity=False
warnings.warn(f"{o} not set")
return sanity
class dynamic_case_template(case_template):
def __init__(self,name,path,schema):
super.__init__(name)
self.path = path
self.schema = schema
self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)]
self.params = find_vars(self.path, self.psign)
self.params_set = {}
self.options = find_vars(self.path,self.osign)
self.options_set = {}
import importlib_resources from ntrfc.database.case_templates.case_creation import case_template, dynamic_case_template
import os
import re
from functools import reduce
import shutil
import warnings
from ntrfc.utils.filehandling.datafiles import get_filelist_fromdir, inplace_change
from ntrfc.utils.dictionaries.dict_utils import set_in_dict, nested_dict_pairs_iterator
def get_directory_structure2(rootdir):
"""
Creates a nested dictionary that represents the folder structure of rootdir
"""
# test method
dir = {}
rootdir = os.path.join(rootdir)
rootdir = rootdir.rstrip(os.sep)
start = rootdir.rfind(os.sep) + 1
for path, dirs, files in os.walk(rootdir):
folders = path[start:].split(os.sep)
subdir = dict.fromkeys(files)
parent = reduce(dict.get, folders[:-1], dir)
parent[folders[-1]] = subdir
return dir[os.path.basename(rootdir)]
def find_vars(path_to_sim, sign):
"""
: param case_structure: dict - case-structure. can carry parameters
: param sign: str - sign of a parameter (Velocity -> U etc.)
: param all_pairs: dict - ?
: param path_to_sim: path - path-like object
return : ?
"""
case_structure = get_directory_structure2(path_to_sim)
all_files = [i[:-1] for i in list(nested_dict_pairs_iterator(case_structure))]
sim_variables = {}
for file in all_files:
filepath = os.path.join(path_to_sim, *file)
filevars =find_variables_infile(filepath, sign)
for k, v in filevars.items():
if k not in sim_variables:
sim_variables[k]=v
else:
sim_variables[k].append(v)
return sim_variables
def find_variables_infile(file, sign):
varsignature = r"<PLACEHOLDER [a-z]{3,}(_{1,1}[a-z]{3,}){,} PLACEHOLDER>".replace("PLACEHOLDER", sign)
siglim = (len(f"< {sign}"), -(len(f" {sign}>")))
variables = {}
with open(file, "r") as fhandle:
for line in fhandle.readlines():
lookaround = True
while lookaround:
lookup_var = re.search(varsignature, line)
if not lookup_var:
lookaround = False
assert sign not in line, f"parameter is not defined correct \n file: {filepath}\n line: {line}"
else:
span = lookup_var.span()
parameter = line[span[0] + siglim[0]:span[1] + siglim[1]]
# update
if parameter not in variables.keys():
variables[parameter] = []
if file not in variables[parameter]:
variables[parameter].append(file)
match = line[span[0]:span[1]]
line = line.replace(match, "")
return variables
def deploy(deply_sources,deploy_targets, deploy_params, deploy_options):
for source, target in zip(deply_sources,deploy_targets):
os.makedirs(os.path.dirname(target), exist_ok=True)
shutil.copyfile(source, target)
for parameter in deploy_params:
inplace_change(target, f"<PARAM {parameter} PARAM>", str(deploy_params[parameter]))
for option in deploy_options:
inplace_change(target, f"<OPTION {option} OPTION>", str(deploy_options[parameter]))
class case_template:
psign = "PARAM"
osign = "OPTION"
def __init__(self, name):
self.name = name
self.path = importlib_resources.files("ntrfc") / f"database/case_templates/{name}"
self.schema = importlib_resources.files("ntrfc") / f"database/case_templates/{name}.schema.yaml"
self.files = [os.path.relpath(fpath, self.path) for fpath in get_filelist_fromdir(self.path)]
self.params = find_vars(self.path, self.psign)
self.params_set = {}
self.options = find_vars(self.path,self.osign)
self.options_set = {}
def set_params_options(self,params_set,options_set):
self.params_set = params_set
self.options_set = options_set
def sanity_check(self):
sanity = True
for p in self.params.keys():
if p not in self.params_set.keys():
sanity=False
warnings.warn(f"{p} not set")
for o in self.options.keys():
if o not in self.options_set.keys():
sanity=False
warnings.warn(f"{o} not set")
return sanity
""" """
avail_templates is used to create a dictionary of with case_templates-Objects avail_templates is used to create a dictionary of with case_templates-Objects
......
...@@ -58,7 +58,8 @@ def test_create_case(tmpdir): ...@@ -58,7 +58,8 @@ def test_create_case(tmpdir):
""" """
import os import os
from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES, deploy from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES
from database.case_templates.case_creation import deploy
from ntrfc.utils.filehandling.datafiles import yaml_dict_read from ntrfc.utils.filehandling.datafiles import yaml_dict_read
from ntrfc.utils.filehandling.datafiles import create_dirstructure from ntrfc.utils.filehandling.datafiles import create_dirstructure
......
include: "rules/create_cases.smk"
rule all:
input : *get_casefiles()
...@@ -4,24 +4,23 @@ import pandas as pd ...@@ -4,24 +4,23 @@ import pandas as pd
from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES from ntrfc.database.case_templates.case_templates import CASE_TEMPLATES
configfile : "casesettings.yaml" configfile : "config/casesettings.yaml"
def validate_configuration(config): def validate_configuration(config):
validate(config,"config.schema.yaml") validate(config,"../schemas/config.schema.yaml")
template = CASE_TEMPLATES[config["case_params"]["case_type"]] template = CASE_TEMPLATES[config["case_params"]["case_type"]]
PARAMS = pd.read_csv("caseparams.tsv",sep="\t") PARAMS = pd.read_csv("config/caseparams.tsv",sep="\t")
validate(PARAMS, template.schema) validate(PARAMS, template.schema)
paramspace = Paramspace(PARAMS) paramspace = Paramspace(PARAMS)
return template, paramspace, config return template, paramspace, config
template, paramspace, config = validate_configuration(config) template, paramspace, config = validate_configuration(config)
rule all: def get_casefiles():
input: return [f"results/simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file
# Aggregate over entire parameter space (or a subset thereof if needed) in template.files]
# of course, something like this can happen anywhere in the workflow (not
# only at the end).
*[f"01_Simulations/{instance_pattern}/{file}" for instance_pattern in paramspace.instance_patterns for file in template.files]
rule create_case: rule create_case:
input: input:
...@@ -29,11 +28,12 @@ rule create_case: ...@@ -29,11 +28,12 @@ rule create_case:
output: output:
# format a wildcard pattern like "alpha~{alpha}/beta~{beta}/gamma~{gamma}" # format a wildcard pattern like "alpha~{alpha}/beta~{beta}/gamma~{gamma}"
# into a file path, with alpha, beta, gamma being the columns of the data frame # into a file path, with alpha, beta, gamma being the columns of the data frame
*[f"01_Simulations/{paramspace.wildcard_pattern}/{file}" for file in template.files] *[f"results/simulations/{paramspace.wildcard_pattern}/{file}" for file in template.files]
params: params:
# automatically translate the wildcard values into an instance of the param space # automatically translate the wildcard values into an instance of the param space
# in the form of a dict (here: {"alpha": ..., "beta": ..., "gamma": ...}) # in the form of a dict (here: {"alpha": ..., "beta": ..., "gamma": ...})
simparams = paramspace.instance simparams = paramspace.instance
run: run:
from ntrfc.database.case_templates.case_templates import deploy from ntrfc.database.case_templates.case_creation import deploy
deploy(input,output,params["simparams"],config["case_options"]) deploy(input,output,params["simparams"],config["case_options"])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment