Source code for bamt.preprocess.numpy_pandas

# currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
# parentdir = os.path.dirname(currentdir)
# sys.path.insert(0,parentdir)
import numpy as np
import pandas as pd


[docs] def loc_to_DataFrame(data: np.array): """Function to convert array to DataFrame Args: data (np.array): input array Returns: data (pd.DataFrame): with string columns for filtering """ nodes_type = get_type_numpy(data) if data.T.ndim == 1: data = data.T nodes_type = {0: nodes_type[0]} dtype = { key: "int64" if value == "disc" else "float64" for key, value in nodes_type.items() if value in ["disc", "cont"] } df = pd.DataFrame(data).astype(dtype) df.columns = df.columns.map(str) return df
[docs] def get_type_numpy(data: np.array): """Function to define the type of the columns of array disc - discrete node cont - continuous Args: data (np.array): input array Returns: dict: output dictionary where 'key' - node name and 'value' - node type Notes: -- You may have problems with confusing rows and columns """ arr = data.T column_type = {} for i, row in enumerate(arr): if row.ndim == 0 or row.T.ndim == 0: row_is_integer = np.issubdtype(row, np.integer) or row.is_integer() column_type[i] = "disc" if row_is_integer else "cont" else: all_row_is_integer = all( np.issubdtype(x, np.integer) or x.is_integer() for x in row ) column_type[i] = "disc" if all_row_is_integer else "cont" if column_type[i] not in ["disc", "cont"]: print("get_type_numpy: Incorrect type of row") print(row) return column_type