Source code for resqpy.olio.wellspec_keywords

"""Module for loading WELLSPEC files. 

The module includes a dictionary of nexus WELLSPEC column keywords, functionality to
read WELLSPEC files and transform the well data into Pandas DataFrames.
"""

# Nexus is a registered trademark of the Halliburton Company

import logging

log = logging.getLogger(__name__)

import numpy as np
import pandas as pd
import datetime
from typing import Any, Dict, Tuple, Type, Optional, List, Union, TextIO

import resqpy.olio.keyword_files as kf

# nexus wellspec columns as required by pagoda
wk_unknown = -3
wk_banned = -2
wk_preferably_not = -1
wk_okay = 0
wk_preferred = 1
wk_required = 2

wellspec_dict: Dict[str, Tuple[int, int, int, Any, bool]] = {}  # mapping wellspec column key to:
#     (warn count, required in, required out, default, length units boolean, )

# NB: changing entries in this list will usually require other code change elsewhere
# second element of tuple should be >= first element
# yapf: disable
wellspec_dict['IW']       = (0, wk_required,  wk_required,  None,   False)
wellspec_dict['JW']       = (0, wk_required,  wk_required,  None,   False)
wellspec_dict['L']        = (0, wk_required,  wk_required,  None,   False)
wellspec_dict['GRID']     = (0, wk_preferred, wk_required,  None,   False)  # or use main grid name as default
wellspec_dict['RADW']     = (0, wk_preferred, wk_required,  0.25,   True)   # use pagoda spec value for i.p. perf
wellspec_dict['KHMULT']   = (0, wk_okay,      wk_okay,      1.0,    False)  # or use 'NA" as default?
wellspec_dict['STAT']     = (0, wk_okay,      wk_okay,      'ON',   False)
wellspec_dict['ANGLA']    = (0, wk_preferred, wk_required,  0.0,    False)
wellspec_dict['ANGLV']    = (0, wk_preferred, wk_required,  0.0,    False)  # default for other perfs (vertical)
wellspec_dict['LENGTH']   = (0, wk_okay,      wk_okay,      None,   True)   # derive default from cell size
wellspec_dict['KH']       = (0, wk_okay,      wk_okay,      None,   True)   # althernative to LENGTH (one required)
wellspec_dict['SKIN']     = (0, wk_okay,      wk_okay,      0.0,    False)
wellspec_dict['PPERF']    = (0, wk_okay,      wk_okay,      1.0,    False)
wellspec_dict['ANGLE']    = (0, wk_okay,      wk_okay,      360.0,  False)
wellspec_dict['IRELPM']   = (0, wk_okay,      wk_okay,      None,   False)  # default fracture IRELPM for i.p. perf
wellspec_dict['RADB']     = (0, wk_okay,      wk_okay,      None,   True)   # calculate from cell size & k_align, k_v
wellspec_dict['WI']       = (0, wk_okay,      wk_okay,      None,   False)  # caluclate from radb, radw & skin
wellspec_dict['K']        = (0, wk_preferably_not, wk_okay, None,   False)  # derive from conductivity for i.p. perf?
wellspec_dict['LAYER']    = (0, wk_preferably_not, wk_okay, None,   False)  # use LGR i.p. layer for i.p. perf
wellspec_dict['DEPTH']    = (0, wk_okay,      wk_okay,      '#',    True)   # # causes nexus to use cell depth
wellspec_dict['X']        = (0, wk_okay,      wk_okay,      None,   True)   # use cell X for i.p. perf
wellspec_dict['Y']        = (0, wk_okay,      wk_okay,      None,   True)   # use cell Y for i.p. perf
wellspec_dict['CELL']     = (0, wk_banned,    wk_banned,    None,   False)  # CELL is for unstructured grids
wellspec_dict['DTOP']     = (0, wk_banned,    wk_banned,    None,   True)   # not compatible with ANGLA, ANGLV
wellspec_dict['DBOT']     = (0, wk_banned,    wk_banned,    None,   True)   # not compatible with ANGLA, ANGLV
wellspec_dict['RADBP']    = (0, wk_preferably_not, wk_okay, None,   True)   # calculate as for RADB
wellspec_dict['RADWP']    = (0, wk_preferably_not, wk_okay, None,   True)   # use pagoda wellbore radius
wellspec_dict['PORTYPE']  = (0, wk_banned,    wk_banned,    None,   False)  # dual porosity: todo: need to check values
wellspec_dict['FM']       = (0, wk_preferably_not, wk_okay, 0.0,    False)  # dual porosity: connection to fracture?
wellspec_dict['MD']       = (0, wk_preferably_not, wk_okay, 'NA',   False)
wellspec_dict['PARENT']   = (0, wk_preferably_not, wk_okay, 'NA',   False)
wellspec_dict['MDCON']    = (0, wk_preferably_not, wk_okay, 'NA',   False)
wellspec_dict['SECT']     = (0, wk_preferably_not, wk_okay, 1,      False)
wellspec_dict['FLOWSECT'] = (0, wk_preferably_not, wk_okay, 1,      False)
wellspec_dict['ZONE']     = (0, wk_preferably_not, wk_okay, 1,      False)
wellspec_dict['GROUP']    = (0, wk_preferably_not, wk_okay, 1,      False)
wellspec_dict['TEMP']     = (0, wk_preferably_not, wk_okay, 'NA',   False)
wellspec_dict['IPTN']     = (0, wk_preferably_not, wk_okay, 1,      False)  # pattern
wellspec_dict['D']        = (0, wk_preferably_not, wk_okay, 'NA',   False)  # non D'Arcy flow
wellspec_dict['ND']       = (0, wk_preferably_not, wk_okay, 'NA',   False)  # non D'Arcy flow
wellspec_dict['DZ']       = (0, wk_preferably_not, wk_okay, None,   True)   # non D'Arcy flow; use LENGTH value? or DZ

wellspec_dtype: Dict[str, Type] = { }  # mapping wellspec column key to expected data type

wellspec_dtype['IW']       = int
wellspec_dtype['JW']       = int
wellspec_dtype['L']        = int
wellspec_dtype['GRID']     = str
wellspec_dtype['RADW']     = float
wellspec_dtype['KHMULT']   = float
wellspec_dtype['STAT']     = str
wellspec_dtype['ANGLA']    = float
wellspec_dtype['ANGLV']    = float
wellspec_dtype['LENGTH']   = float
wellspec_dtype['KH']       = float
wellspec_dtype['SKIN']     = float
wellspec_dtype['PPERF']    = float
wellspec_dtype['ANGLE']    = float
wellspec_dtype['IRELPM']   = int
wellspec_dtype['RADB']     = float
wellspec_dtype['WI']       = float
wellspec_dtype['K']        = float
wellspec_dtype['LAYER']    = int
wellspec_dtype['DEPTH']    = float   # '#' causes nexus to use cell depth
wellspec_dtype['X']        = float   # use cell X for i.p. perf
wellspec_dtype['Y']        = float   # use cell Y for i.p. perf
wellspec_dtype['CELL']     = int     # CELL is for unstructured grids
wellspec_dtype['DTOP']     = float   # not compatible with ANGLA, ANGLV
wellspec_dtype['DBOT']     = float   # not compatible with ANGLA, ANGLV
wellspec_dtype['RADBP']    = float   # calculate as for RADB
wellspec_dtype['RADWP']    = float
wellspec_dtype['PORTYPE']  = str     # dual porosity: todo: need to check type
wellspec_dtype['FM']       = float
wellspec_dtype['MD']       = float
wellspec_dtype['PARENT']   = str
wellspec_dtype['MDCON']    = float
wellspec_dtype['SECT']     = str     # todo: need to check type
wellspec_dtype['FLOWSECT'] = str     # todo: need to check type
wellspec_dtype['ZONE']     = int
wellspec_dtype['GROUP']    = str
wellspec_dtype['TEMP']     = float
wellspec_dtype['IPTN']     = int
wellspec_dtype['D']        = float
wellspec_dtype['ND']       = str
wellspec_dtype['DZ']       = float
# yapf: enable


[docs]def increment_complaints(keyword): # pragma: no cover """Increments the count of complaints (warnings) associated with the keyword.""" global wellspec_dict assert keyword.upper() in wellspec_dict.keys() old_entry = wellspec_dict[keyword.upper()] wellspec_dict[keyword.upper()] = ( old_entry[0] + 1, old_entry[1], old_entry[2], old_entry[3], old_entry[4], )
[docs]def known_keyword(keyword): # pragma: no cover """Returns True if the keyword exists in the wellspec dictionary.""" return keyword.upper() in wellspec_dict.keys()
[docs]def add_unknown_keyword(keyword): # pragma: no cover """Adds the keyword to the dictionary with attributes flagged as unknown.""" global wellspec_dict assert not known_keyword(keyword) wellspec_dict[keyword.upper()] = ( 1, wk_unknown, wk_banned, None, False, ) # assumes warning or error already given
[docs]def default_value(keyword): # pragma: no cover """Returns the default value for the keyword.""" assert known_keyword(keyword) return wellspec_dict[keyword][3]
[docs]def complaints(keyword): # pragma: no cover """Returns the number of complaints (warnings) logged for the keyword.""" assert known_keyword(keyword) return wellspec_dict[keyword][0]
[docs]def check_value(keyword, value): """Returns True if the value is acceptable for the keyword.""" try: key = keyword.upper() if not known_keyword(key): return False if key in [ "IW", "JW", "L", "LAYER", "IRELPM", "CELL", "SECT", "FLOWSECT", "ZONE", "IPTN", ]: return int(value) > 0 elif key == "GRID": return len(str(value)) > 0 elif key == "STAT": return (str(value)).upper() in ["ON", "OFF"] elif key == "ANGLA": return -360.0 <= float(value) and float(value) <= 360.0 elif key == "ANGLV": return 0.0 <= float(value) and float(value) <= 180.0 elif key in ["RADW", "RADB", "RADWP", "RADBP"]: return float(value) > 0.0 elif key in ["WI", "LENGTH", "KH", "KHMULT", "K", "DZ"]: return float(value) >= 0.0 elif key == "PPERF": return 0.0 <= float(value) and float(value) <= 1.0 elif key == "ANGLE": return 0.0 <= float(value) and float(value) <= 360.0 elif key in ["SKIN", "DEPTH", "X", "Y", "TEMP"]: float(value) return True else: # pragma: no cover return True except Exception: return False
[docs]def required_out_list(): # pragma: no cover """Returns a list of keywords that are required.""" list = [] for key in wellspec_dict.keys(): if wellspec_dict[key][2] == wk_required: list.append(key) return list
[docs]def length_unit_conversion_applicable(keyword): # pragma: no cover """Returns True if the keyword has a quantity class of length.""" assert known_keyword(keyword) return wellspec_dict[keyword][4]
[docs]def load_wellspecs(wellspec_file: str, well: Optional[str] = None, column_list: Union[List[str], None] = [], keep_duplicate_cells: bool = False, keep_null_columns: bool = True, last_data_only: bool = True, usa_date_format: bool = False, return_dates_list: bool = False): """Reads the Nexus wellspec file returning a dictionary of well name to pandas dataframe. arguments: wellspec_file (str): file path of ascii input file containing wellspec keywords. well (str, optional): if present, only the data for the named well are loaded. If None, data for all wells are loaded. column_list (List[str]/None): if present, each dataframe returned contains these columns, in this order. If None, the resulting dictionary contains only well names as keys (each mapping to None rather than a dataframe). If an empty list (default), each dataframe contains the columns listed in the corresponding wellspec header, in the order found in the file. keep_duplicate_cells (bool): if True (default), duplicate cells are kept, otherwise only the last entry is kept. keep_null_columns (bool): if True (default), columns that contain all NA values are kept, otherwise they are removed. last_data_only (bool): If True, only the last entry of well data in the file are used in the dataframe, otherwise all of the well data are used at different times. usa_date_format (bool): If True, wellspec file is expected to contain date formats in MM/DD/YYYY. if False, DD/MM/YYYY. return_dates_list (bool, default False): if True, a sorted list of unique dates present in the wellspec file is also returned, with dates in iso format returns: well_dict (Dict[str, Union[pd.DataFrame, None]]): mapping each well name found in the wellspec file to a dataframe containing the wellspec data or (well_dict, dates_list): where dates list is a sorted list of all dates present in the wellspec file (including those not relevant to a specific well), in iso format note: if return_dates_list is True, the returned list always contains all dates from the wellspec file that applied to any entry, regardless of the well and last_data_only arguments; the dates list will not include a null entry, even if there are wellspec data before the first timestamp """ assert wellspec_file, "No wellspec file specified." if column_list is not None: for column in column_list: assert (column.upper() in wellspec_dict), f"Unrecognized wellspec column name {column}." selecting = bool(column_list) well_dict = {} well_pointers = get_well_pointers(wellspec_file, usa_date_format = usa_date_format) dates_list = None if return_dates_list: dates_list = _prepare_dates_list(well_pointers) if column_list is None: well_dict = dict.fromkeys(well_pointers, None) if return_dates_list: return (well_dict, dates_list) else: return well_dict with open(wellspec_file, "r") as file: if well: well_data = get_all_well_data( file, well, well_pointers[well], column_list, selecting, keep_duplicate_cells, keep_null_columns, last_data_only, ) if well_data is not None: well_dict[well] = well_data else: for well_name, pointer in well_pointers.items(): well_data = get_all_well_data( file, well_name, pointer, column_list, selecting, keep_duplicate_cells, keep_null_columns, last_data_only, ) if well_data is not None: well_dict[well_name] = well_data if return_dates_list: return (well_dict, dates_list) else: return well_dict
def _prepare_dates_list(well_pointers): dates = [] for entries in well_pointers.values(): for _, date in entries: if date and date not in dates: dates.append(date) dates.sort() return dates
[docs]def get_well_pointers( wellspec_file: str, usa_date_format: bool = False, no_date_replacement: Optional[datetime.date] = None, ) -> Dict[str, List[Tuple[int, Union[None, str]]]]: """Gets the file locations of each well in the wellspec file for optimised processing of the data. arguments: wellspec_file (str): file path of ascii input file containing wellspec keywords. usa_date_format (bool): if True, the date taken from the wellspec file is in the format MM/DD/YYYY, otherwise it is in the format DD/MM/YYYY. no_date_replacement (datetime.date, optional): if there is no date given for a well, this date is used. returns: well_pointers (Dict[str, List[Tuple[int, None/str]]]): mapping each well name found in the wellspec file to a list of their file locations and dates as tuples. If there is no date before the well data in the file, the date is None. If there is a FileNotFoundError then None is returned. """ well_pointers: Dict[str, List[Tuple[int, Union[None, str]]]] = {} try: with open(wellspec_file, "r") as file: while True: found = kf.find_keyword(file, "WELLSPEC") if not found: break line = file.readline() words = line.split() assert len(words) >= 2, "Missing well name after WELLSPEC keyword." well_name = words[1] if well_name in well_pointers: well_pointers[well_name].append((file.tell(), None)) else: well_pointers[well_name] = [(file.tell(), None)] except FileNotFoundError: raise FileNotFoundError(f"The file {wellspec_file} can't be found.") time_pointers = {} with open(wellspec_file, "r") as file: while True: found = kf.find_keyword(file, "TIME") if not found: break line = file.readline() words = line.split() assert len(words) >= 2, "Missing date after TIME keyword." date = words[1] date_obj = None if '(' in date: # sometimes user specifies (HH:MM:SS) along with date - can separate time from date with this check date = date.split('(')[0] try: if usa_date_format: date_obj = datetime.datetime.strptime(date, "%m/%d/%Y").date() else: date_obj = datetime.datetime.strptime(date, "%d/%m/%Y").date() except ValueError: raise ValueError(f"The date found '{date}' does not match the correct format (usa_date_format " f"is {usa_date_format}).") if no_date_replacement is not None and date_obj < no_date_replacement: raise ValueError( f"The Zero Date {no_date_replacement} must be before the first wellspec TIME {date_obj}.") date = date_obj.isoformat() time_pointers[file.tell()] = date current_date = None # Before first TIME if no_date_replacement is not None: current_date = no_date_replacement.isoformat() for well, well_pointer_list in well_pointers.items(): for i, well_pointer in enumerate(well_pointer_list): for time_pointer, date in time_pointers.items(): if well_pointer[0] > time_pointer: current_date = date else: break well_pointers[well][i] = (well_pointer[0], current_date) return well_pointers
[docs]def get_well_data( file: TextIO, well_name: str, pointer: int, column_list: List[str] = [], selecting: bool = False, keep_duplicate_cells: bool = True, keep_null_columns: bool = True, date: Optional[str] = None, ) -> Union[pd.DataFrame, None]: """Creates a dataframe of the well data for a given well name and at a specific time in the wellspec file. The pointer argument is used to go to the file location where the well dataset is located. arguments: file (TextIO): the opened wellspec file object. well_name (str): name of the well. pointer (int): the file object's start position of the well data represented as number of bytes from the beginning of the file. column_list (List[str]): if present, each dataframe returned contains these columns, in this order. If None, the resulting dictionary contains only well names as keys (each mapping to None rather than a dataframe). If an empty list (default), each dataframe contains the columns listed in the corresponding wellspec header, in the order found in the file. selecting (bool): True if the column_list contains at least one column name, False otherwise (default). keep_duplicate_cells (bool): if True (default), duplicate cells are kept, otherwise only the last entry is kept. keep_null_columns (bool): if True (default), columns that contain all NA values are kept, otherwise they are removed. date (str, optional): the well date which is provided by the get_well_pointers function along with the well pointers. Returns: Pandas dataframe of the well data or None if all the data are NA. """ file.seek(pointer) kf.skip_blank_lines_and_comments(file) line = kf.strip_trailing_comment(file.readline()).upper() columns_present = line.split() if selecting: column_map = np.full((len(column_list),), -1, dtype = int) for i in range(len(column_list)): column = column_list[i].upper() if column in columns_present: column_map[i] = columns_present.index(column) df_col = column_list else: df_col = columns_present data: Dict[str, List] = {col: [] for col in df_col} all_null = True while True: kf.skip_comments(file) if (kf.specific_keyword_next(file, "WELLSPEC") or kf.specific_keyword_next(file, "WELLMOD") or kf.specific_keyword_next(file, "TIME") or kf.specific_keyword_next(file, "INCLUDE")): break line = kf.strip_trailing_comment(file.readline()) words = line.split() if len(words) == 0: break # end of file assert len(words) >= len(columns_present), f"Insufficient data in line of wellspec table {well_name} [{line}]." if selecting: for col_index, col in enumerate(column_list): if column_map[col_index] < 0: if column_list[col_index].upper() == "GRID": data[col].append("ROOT") else: data[col].append(np.NaN) else: value = words[column_map[col_index]] if value == "NA": data[col].append(np.NaN) elif value == "#": data[col].append(value) elif value: data[col].append(wellspec_dtype[col.upper()](value)) if not pd.isnull(data[col][-1]): all_null = False else: for col, value in zip(columns_present, words[:len(columns_present)]): if value == "NA": data[col].append(np.NaN) elif value == "#": data[col].append(value) elif value: data[col].append(wellspec_dtype[col](value)) if not pd.isnull(data[col][-1]): all_null = False if all_null: log.warning(f"Null wellspec data for well {well_name}{f' at date {date}' if date is not None else ''}.") return None df = pd.DataFrame(data) if not keep_null_columns: df.drop(columns = df.columns[df.isna().all()], inplace = True) if not keep_duplicate_cells and any(df.duplicated(subset = ["IW", "JW", "L"])): log.warning(f"There are duplicate cells for well {well_name}.") df.drop_duplicates(subset = ["IW", "JW", "L"], keep = "last", inplace = True) def stat_tranformation(row): if row["STAT"] == "ON": return 1 else: return 0 if "STAT" in df.columns: df["STAT"] = df.apply(lambda row: stat_tranformation(row), axis = 1) return df
[docs]def get_all_well_data( file: TextIO, well_name: str, pointers: List[Tuple[int, Union[None, str]]], column_list: List[str] = [], selecting: bool = False, keep_duplicate_cells: bool = False, keep_null_columns: bool = True, last_data_only: bool = True, ) -> Union[pd.DataFrame, None]: """Creates a dataframe of all the well data for a given well name in the wellspec file. This differs from the get_well_data function in that here multiple datasets for a well are combined into a single dataframe if they exist. arguments: file (TextIO): the opened wellspec file object. well_name (str): name of the well. pointers (List[Tuple[int, None/str]]): a list of the file object's start position of the well data represented as number of bytes from the beginning of the file and the well's date. If no date existed before the well in the file, the date will be None. column_list (List[str]): if present, each dataframe returned contains these columns, in this order. If None, the resulting dictionary contains only well names as keys (each mapping to None rather than a dataframe). If an empty list (default), each dataframe contains the columns listed in the corresponding wellspec header, in the order found in the file. selecting (bool): True if the column_list contains at least one column name, False otherwise (default). keep_duplicate_cells (bool): if True (default), duplicate cells are kept, otherwise only the last entry is kept. keep_null_columns (bool): if True (default), columns that contain all NA values are kept, otherwise they are removed. last_data_only (bool): If True, only the last entry of well data in the file are used in the dataframe, otherwise all of the well data are used at different times. returns: Pandas dataframe of all well data for a specific well name or None if all the data are NA. """ if last_data_only: if column_list is None: return None df = get_well_data( file, well_name, pointers[-1][0], column_list, selecting, keep_duplicate_cells, keep_null_columns, date = pointers[-1][1], ) return df else: df_list = [] for pointer, date in pointers: df = get_well_data( file, well_name, pointer, column_list, selecting, keep_duplicate_cells, date = date, ) if df is None: continue df["DATE"] = date df_list.append(df) if len(df_list) == 0: return None df_combined = pd.concat(df_list, ignore_index = True) if not keep_null_columns: df_combined.drop(columns = df_combined.columns[df_combined.isna().all()], inplace = True) return df_combined