Source code for aiida_vasp.parsers.content_parsers.win

"""
The .win parser interface.

=========================

Contains routines to parse Wannier90 input files. Will in the future utilize
the parser in the Wannier90 plugin, but no input parser exists yet.
"""

import re
from typing import Any, Callable


[docs] class BaseKeyValueParser: # pylint: disable=useless-object-inheritance """ Common codebase for all parser utilities. This class provides utility methods for parsing key-value and line-based data. """ empty_line = re.compile(r'[\r\n]\s*[\r\n]')
[docs] @classmethod def line(cls, fobj_or_str: str | Any, d_type: type = str) -> Any: """ Grab a line from a file object or string and convert it to ``d_type`` (default: ``str``). :param fobj_or_str: File object or string to read the line from. :type fobj_or_str: file or str :param d_type: Type to convert each item in the line to. Defaults to ``str``. :type d_type: type :return: Single value or list of values, depending on the number of items in the line. """ if isinstance(fobj_or_str, str): line = fobj_or_str else: line = fobj_or_str.readline() # previously this was map instead of list comprehension res = [d_type(item) for item in line.split()] if len(res) == 1: return res[0] return res
[docs] @classmethod def splitlines(cls, fobj_or_str: str | Any, d_type: type = float) -> list[Any]: """ Split a chunk of text into a list of lines and convert each line to ``d_type`` (default: ``float``). :param fobj_or_str: File object or string to split into lines. :type fobj_or_str: file or str :param d_type: Type to convert each item in the line to. Defaults to ``float``. :type d_type: type :return: List of values, one per line. """ if isinstance(fobj_or_str, str): lines = fobj_or_str.split('\n') else: lines = fobj_or_str.readlines() return [cls.line(line, d_type) for line in lines]
[docs] class KeyValueParser(BaseKeyValueParser): """ Key and value parser. This base class provides utility functions for parsing files that are (mostly) in a ``key = value`` format. .. note:: This class does not integrate with the ``VaspParser`` class currently. Example usage:: import re from aiida_vasp.parsers.file_parsers.parser import KeyValueParser class ParamParser(KeyValueParser): def __init__(self, file_path): self._file_path = py.path.local(file_path) super().__init__() self.result = {} def convert_or_not(self, value): for converter in self.get_converter_iter(): converted = self.try_convert(value, converter) if converted and 'value' in converted: return converted['value'] return value def parse_file(self): assignments = re.findall(self.assignment, self._file_path.read()) return {key: self.convert_or_not(value)} Parses files like:: StrParam = value_1 FloatParam = 1.0 BoolParam = True """ assignment = re.compile(r'(\w+)\s*[=: ]\s*([^;\n]*);?') bool_true = re.compile(r'^T$') bool_false = re.compile(r'^F$') comment = True
[docs] @classmethod def get_lines(cls, filename: str) -> list[str]: """ Read all lines from a file. :param filename: Path to the file. :type filename: str :return: List of lines from the file. :rtype: list[str] """ with open(filename, 'r', encoding='utf8') as input_file: lines = input_file.read().splitlines() return lines
[docs] @classmethod def retval(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: """ Normalize return values from value conversion functions. :return: Dictionary with the value and any additional keyword arguments. :rtype: dict """ val = list(args) if len(val) == 1: val = val[0] ret = {'value': val} ret.update(kwargs) return ret
[docs] @classmethod def flatten(cls, lst: list[list[Any]]) -> list[Any]: """ Flatten a list of lists into a single list. :param lst: List of lists. :type lst: list :return: Flattened list. :rtype: list """ return [i for j in lst for i in j]
[docs] @classmethod def find_kv(cls, line: str) -> list[tuple[str, str]]: """ Find key-value pairs in a line using the assignment regex. :param line: Line to search for key-value pairs. :type line: str :return: List of (key, value) tuples. :rtype: list[tuple] """ return re.findall(cls.assignment, line)
[docs] @classmethod def float(cls, string_: str) -> dict[str, Any]: """ Parse a string into a float value followed by a comment. :param string_: String to parse. :type string_: str :return: Dictionary with value and comment. :rtype: dict """ vals = string_.split() value = float(vals.pop(0)) comment = ' '.join(vals) return cls.retval(value, comment=comment)
[docs] @classmethod def float_unit(cls, string_: str) -> dict[str, Any]: """ Parse string into a float number with attached unit. :param string_: String to parse. :type string_: str :return: Dictionary with value, unit, and comment. :rtype: dict """ vals = string_.split() value = float(vals.pop(0)) unit = vals.pop(0) if vals else '' comment = ' '.join(vals) return cls.retval(value, unit, comment=comment)
[docs] @classmethod def int(cls, string_: str) -> dict[str, Any]: """ Parse a string into an integer value followed by a comment. :param string_: String to parse. :type string_: str :return: Dictionary with value and comment. :rtype: dict """ vals = string_.split() value = int(vals.pop(0)) comment = ' '.join(vals) return cls.retval(value, comment=comment)
[docs] @classmethod def int_unit(cls, string_: str) -> dict[str, Any]: """ Convert a string into a python value, associated unit and optional comment. :param string_: String to parse. :type string_: str :return: Dictionary with value, unit, and comment. :rtype: dict """ vals = string_.split() value = int(vals.pop(0)) unit = vals.pop(0) if vals else '' comment = ' '.join(vals) return cls.retval(value, unit, comment=comment)
[docs] @classmethod def string(cls, string_: str) -> dict[str, Any]: """ Parse a string into value and comment, assuming only the first word is the value. :param string_: String to parse. :type string_: str :return: Dictionary with value and comment. :rtype: dict """ vals = string_.split() value = vals.pop(0) comment = ' '.join(vals) return cls.retval(value, comment=comment)
[docs] @classmethod def bool(cls, string_: str) -> dict[str, Any]: """ Parse string into a boolean value. :param string_: String to parse. :type string_: str :return: Dictionary with value and comment. :rtype: dict :raises ValueError: If the string does not match a boolean pattern. """ vals = string_.split() bool_str = vals.pop(0) if re.match(cls.bool_true, bool_str): value = True elif re.match(cls.bool_false, bool_str): value = False else: raise ValueError( f'bool string {string_} did not match any of {[cls.bool_true.pattern, cls.bool_false.pattern]}' ) comment = ' '.join(vals) return cls.retval(value, comment=comment)
[docs] @classmethod def kv_list(cls, filename: str) -> list[Any]: """ Read a file and return a list of key-value pairs for each line. :param filename: Path to the file. :type filename: str :return: List of key-value pairs. :rtype: list """ with open(filename, 'r', encoding='utf8') as input_fo: kv_list = filter(None, map(cls.find_kv, input_fo)) return kv_list
[docs] @classmethod def kv_dict(cls, kv_list: list[Any]) -> dict[str, Any]: """ Convert a list of key-value pairs into a dictionary. :param kv_list: List of key-value pairs. :type kv_list: list :return: Dictionary of key-value pairs. :rtype: dict """ kv_dict = dict(cls.flatten(kv_list)) return kv_dict
[docs] @classmethod def clean_value(cls, str_value: str) -> dict[str, Any]: """ Get the converted python value from a string. :param str_value: String value to convert. :type str_value: str :return: Dictionary with the converted value. :rtype: dict """ if str_value == '': return cls.retval(str_value) cleaned_value = None converters = cls.get_converter_iter() while not cleaned_value: cleaned_value = cls.try_convert(str_value, converters.next()) return cleaned_value
[docs] @classmethod def get_converter_iter(cls) -> Any: """ Get an iterator over the value converter functions in order. :return: Iterator over converter functions. """ converter_order = [cls.bool, cls.int, cls.float, cls.string] return (i for i in converter_order)
[docs] @classmethod def try_convert(cls, input_value: str, converter: Callable[[str], dict[str, Any]]) -> dict[str, Any] | None: """ Try to convert the input string into a python value given a conversion function. :param input_value: Value to convert. :type input_value: str :param converter: Converter function to use. :type converter: callable :return: Dictionary with the converted value, or None if conversion failed. :rtype: dict or None """ if not isinstance(input_value, str): return {'value': input_value} try: cleaned_value = converter(input_value) except ValueError: cleaned_value = {} if cleaned_value.get('value', None) is None: return None return cleaned_value
[docs] class WinParser(KeyValueParser): """ Parses wannier90 input files. This parser extracts keywords, blocks, and comments from a Wannier90 ``.win`` input file. """ block = re.compile(r'begin (?P<name>\w*)\s*\n\s*(?P<content>[\w\W]*)\s*\n\s*end \1') comment = re.compile(r'(!.*)\n?') def __init__(self, path: str) -> None: """ Initialize the parser and parse the Wannier90 input file. :param path: Path to the Wannier90 .win file. :type path: str """ super().__init__() self.result = {} with open(path, 'r', encoding='utf8') as winf: self.keywords, self.blocks, self.comments = WinParser.parse_win(winf) self.result.update(self.keywords) self.result.update(self.blocks)
[docs] @classmethod def parse_win(cls, fobj_or_str: str | Any) -> tuple[dict[str, Any], dict[str, list[str]], list[str]]: """ Parse a Wannier90 input file or string. :param fobj_or_str: File object or string containing the Wannier90 input. :type fobj_or_str: file or str :return: Tuple of (keywords dict, blocks dict, comments list). :rtype: tuple """ if isinstance(fobj_or_str, str): content = fobj_or_str else: content = fobj_or_str.read() comments = re.findall(cls.comment, content) content = re.sub(cls.comment, '', content) blocks = re.findall(cls.block, content) content = re.sub(cls.block, '', content) kvd = dict(re.findall(cls.assignment, content)) bld = {} for keyword, value in blocks: # do not split individual lines bld[keyword] = [line.strip() for line in value.split('\n')] return kvd, bld, comments