Skip to content

PyTables

Bases: PyObjects

Groups together multiple tables.

You can interact with PyTables straight from model. You can even filter down with .find().

Source code in pytabular/table.py
class PyTables(PyObjects):
    """Groups together multiple tables.

    You can interact with `PyTables` straight from model.
    You can even filter down with `.find()`.
    """

    def __init__(self, objects) -> None:
        """Init just extends from the main `PyObjects` class."""
        super().__init__(objects)

    def refresh(self, *args, **kwargs):
        """Refreshes all `PyTable`(s) in class."""
        model = self._objects[0].Model
        return model.refresh(self, *args, **kwargs)

    def query_all(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:
        """Dynamically query all tables.

        It will replace the `_` with the `query_function` arg
        to build out the query to run.

        Args:
                query_function (str, optional): Dax query is
                        dynamically building a query with the
                        `UNION` & `ROW` DAX Functions. Defaults to 'COUNTROWS(_)'.

        Returns:
                pd.DataFrame: Returns dataframe with results

        Example:
            ```python
            model.Tables.find('fact').query_all() # (1)
            ```

            1. Because `.find()` will return the `PyObjects` you are searching in,
            another `PyTables` is returned, but reduced to just
            the `PyTable`(s) with the 'fact' in the name. Then will
            get the # of rows for each table.
        """
        logger.info("Querying every table in PyTables...")
        logger.debug(f"Function to be run: {query_function}")
        logger.debug("Dynamically creating DAX query...")
        query_str = "EVALUATE UNION(\n"
        for table in self:
            table_name = table.get_Name()
            dax_table_identifier = f"'{table_name}'"
            query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",\
                {query_function.replace('_',dax_table_identifier)}),\n"  # noqa: E231, E261
        query_str = f"{query_str[:-2]})"
        return self[0].Model.query(query_str)

    def find_zero_rows(self) -> "PyTables":
        """Returns PyTables class of tables with zero rows queried.

        Returns:
            PyTables: A subset of the `PyTables` that contains zero rows.
        """
        query_function: str = "COUNTROWS(_)"
        df = self.query_all(query_function)

        table_names = df[df[f"[{query_function}]"].isna()]["[Table]"].to_list()
        logger.debug(f"Found {table_names}")
        tables = [self[name] for name in table_names]
        return self.__class__(tables)

    def last_refresh(self, group_partition: bool = True) -> pd.DataFrame:
        """Returns `pd.DataFrame` of tables with their latest refresh time.

        Optional 'group_partition' variable, default is True.
        If False an extra column will be include to
        have the last refresh time to the grain of the partition
        Example to add to model
        `model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes')`.

        Args:
            group_partition (bool, optional): Whether or not you want
                the grain of the dataframe to be by table or by partition.
                Defaults to True.

        Returns:
            pd.DataFrame: pd dataframe with the RefreshedTime property
                If group_partition == True and the table has
                multiple partitions, then df.groupby(by["tables"]).max()
        """
        data = {
            "Tables": [
                partition.Table.Name for table in self for partition in table.Partitions
            ],
            "Partitions": [
                partition.Name for table in self for partition in table.Partitions
            ],
            "RefreshedTime": [
                partition.last_refresh()
                for table in self
                for partition in table.Partitions
            ],
        }
        df = pd.DataFrame(data)
        if group_partition:
            logger.debug("Grouping together to grain of Table")
            return (
                df[["Tables", "RefreshedTime"]]
                .groupby(by=["Tables"])
                .max()
                .reset_index(drop=False)
            )
        else:
            logger.debug("Returning DF")
            return df

__init__(objects)

Init just extends from the main PyObjects class.

Source code in pytabular/table.py
def __init__(self, objects) -> None:
    """Init just extends from the main `PyObjects` class."""
    super().__init__(objects)

refresh(*args, **kwargs)

Refreshes all PyTable(s) in class.

Source code in pytabular/table.py
def refresh(self, *args, **kwargs):
    """Refreshes all `PyTable`(s) in class."""
    model = self._objects[0].Model
    return model.refresh(self, *args, **kwargs)

query_all(query_function='COUNTROWS(_)')

Dynamically query all tables.

It will replace the _ with the query_function arg to build out the query to run.

Parameters:

Name Type Description Default
query_function str

Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'.

'COUNTROWS(_)'

Returns:

Type Description
DataFrame

pd.DataFrame: Returns dataframe with results

Example
model.Tables.find('fact').query_all() # (1)
  1. Because .find() will return the PyObjects you are searching in, another PyTables is returned, but reduced to just the PyTable(s) with the 'fact' in the name. Then will get the # of rows for each table.
Source code in pytabular/table.py
def query_all(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:
    """Dynamically query all tables.

    It will replace the `_` with the `query_function` arg
    to build out the query to run.

    Args:
            query_function (str, optional): Dax query is
                    dynamically building a query with the
                    `UNION` & `ROW` DAX Functions. Defaults to 'COUNTROWS(_)'.

    Returns:
            pd.DataFrame: Returns dataframe with results

    Example:
        ```python
        model.Tables.find('fact').query_all() # (1)
        ```

        1. Because `.find()` will return the `PyObjects` you are searching in,
        another `PyTables` is returned, but reduced to just
        the `PyTable`(s) with the 'fact' in the name. Then will
        get the # of rows for each table.
    """
    logger.info("Querying every table in PyTables...")
    logger.debug(f"Function to be run: {query_function}")
    logger.debug("Dynamically creating DAX query...")
    query_str = "EVALUATE UNION(\n"
    for table in self:
        table_name = table.get_Name()
        dax_table_identifier = f"'{table_name}'"
        query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",\
            {query_function.replace('_',dax_table_identifier)}),\n"  # noqa: E231, E261
    query_str = f"{query_str[:-2]})"
    return self[0].Model.query(query_str)

find_zero_rows()

Returns PyTables class of tables with zero rows queried.

Returns:

Name Type Description
PyTables PyTables

A subset of the PyTables that contains zero rows.

Source code in pytabular/table.py
def find_zero_rows(self) -> "PyTables":
    """Returns PyTables class of tables with zero rows queried.

    Returns:
        PyTables: A subset of the `PyTables` that contains zero rows.
    """
    query_function: str = "COUNTROWS(_)"
    df = self.query_all(query_function)

    table_names = df[df[f"[{query_function}]"].isna()]["[Table]"].to_list()
    logger.debug(f"Found {table_names}")
    tables = [self[name] for name in table_names]
    return self.__class__(tables)

last_refresh(group_partition=True)

Returns pd.DataFrame of tables with their latest refresh time.

Optional 'group_partition' variable, default is True. If False an extra column will be include to have the last refresh time to the grain of the partition Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes').

Parameters:

Name Type Description Default
group_partition bool

Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True.

True

Returns:

Type Description
DataFrame

pd.DataFrame: pd dataframe with the RefreshedTime property If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max()

Source code in pytabular/table.py
def last_refresh(self, group_partition: bool = True) -> pd.DataFrame:
    """Returns `pd.DataFrame` of tables with their latest refresh time.

    Optional 'group_partition' variable, default is True.
    If False an extra column will be include to
    have the last refresh time to the grain of the partition
    Example to add to model
    `model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes')`.

    Args:
        group_partition (bool, optional): Whether or not you want
            the grain of the dataframe to be by table or by partition.
            Defaults to True.

    Returns:
        pd.DataFrame: pd dataframe with the RefreshedTime property
            If group_partition == True and the table has
            multiple partitions, then df.groupby(by["tables"]).max()
    """
    data = {
        "Tables": [
            partition.Table.Name for table in self for partition in table.Partitions
        ],
        "Partitions": [
            partition.Name for table in self for partition in table.Partitions
        ],
        "RefreshedTime": [
            partition.last_refresh()
            for table in self
            for partition in table.Partitions
        ],
    }
    df = pd.DataFrame(data)
    if group_partition:
        logger.debug("Grouping together to grain of Table")
        return (
            df[["Tables", "RefreshedTime"]]
            .groupby(by=["Tables"])
            .max()
            .reset_index(drop=False)
        )
    else:
        logger.debug("Returning DF")
        return df

The main parent class for grouping your (Tables, Columns, Measures, Partitions, etc.).

Notice the magic methods. __rich_repr__() starts the baseline for displaying your model. It uses the amazing rich python package and builds your display from the self._display. Still building out the magic methods to give PyObjects more flexibility.

Source code in pytabular/object.py
class PyObjects:
    """The main parent class for grouping your (Tables, Columns, Measures, Partitions, etc.).

    Notice the magic methods. `__rich_repr__()` starts the baseline for displaying your model.
    It uses the amazing `rich` python package and
    builds your display from the `self._display`.
    Still building out the magic methods to give `PyObjects` more flexibility.
    """

    def __init__(self, objects: list[PyObject], parent=None) -> None:
        """Initialization of `PyObjects`.

        Takes the objects in something that is iterable.
        Then will build a default `rich` table display.

        Args:
            objects(list[PyObject]): .Net objects.
            parent: Parent Object. Defaults to `None`.
        """
        self._objects = objects
        self.parent = parent
        self._display = Table(title=str(self.__class__.mro()[0]))
        for index, obj in enumerate(self._objects):
            self._display.add_row(str(index), obj.Name)

    def __rich_repr__(self) -> str:
        """See [Rich Repr](https://rich.readthedocs.io/en/stable/pretty.html#rich-repr-protocol)."""
        Console().print(self._display)

    def __getitem__(self, object):
        """Get item from `PyObjects`.

        Checks if item is str or int.
        If string will iterate through and try to find matching name.
        Otherwise, will call into `self._objects[int]` to retrieve item.
        """
        if isinstance(object, str):
            return [pyobject for pyobject in self._objects if object == pyobject.Name][
                -1
            ]
        elif isinstance(object, slice):
            cls = type(self)
            return cls(self._objects[object])
        else:
            return self._objects[object]

    def __iter__(self):
        """Iterate through `PyObjects`."""
        yield from self._objects

    def __len__(self) -> int:
        """Get length of `PyObjects`.

        Returns:
            int: Number of PyObject in PyObjects
        """
        return len(self._objects)

    def __iadd__(self, obj):
        """Add a `PyObject` or `PyObjects` to your current `PyObjects` class.

        This is useful for building out a custom `PyObjects` class to work with.
        """
        if isinstance(obj, Iterable):
            self._objects.__iadd__(obj._objects)
        else:
            self._objects.__iadd__([obj])

        self.__init__(self._objects)
        return self

    def _first_visible_object(self):
        """Does what the method is called. Get's first `object.IsHidden is False`."""
        for object in self:
            if object.IsHidden is False:
                return object
        return None

    def find(self, object_str: str):
        """Finds any or all `PyObject` inside of `PyObjects` that match the `object_str`.

        It is case insensitive.

        Args:
            object_str (str): str to lookup in `PyObjects`

        Returns:
            PyObjects (object.PyObjects): Returns a `PyObjects` class with all `PyObject`
                where the `PyObject.Name` matches `object_str`.
        """
        items = [
            object
            for object in self._objects
            if object_str.lower() in object.Name.lower()
        ]
        return self.__class__.mro()[0](items)

    def get(self, object_str: str, alt_result: str = "") -> str:
        """Gets the object based on str.

        If the object isnt found, then an alternate result
        can be supplied as an argument.

        Args:
            object_str (str): str to lookup object
            alt_result (str): str to return when value isn't found.

        Returns:
            str: Result of the lookup, or the alternate result.
        """
        try:
            return self.__getitem__(object_str)
        except Exception as e:
            Console().print(e)

        return alt_result

find(object_str)

Finds any or all PyObject inside of PyObjects that match the object_str.

It is case insensitive.

Parameters:

Name Type Description Default
object_str str

str to lookup in PyObjects

required

Returns:

Name Type Description
PyObjects PyObjects

Returns a PyObjects class with all PyObject where the PyObject.Name matches object_str.

Source code in pytabular/object.py
def find(self, object_str: str):
    """Finds any or all `PyObject` inside of `PyObjects` that match the `object_str`.

    It is case insensitive.

    Args:
        object_str (str): str to lookup in `PyObjects`

    Returns:
        PyObjects (object.PyObjects): Returns a `PyObjects` class with all `PyObject`
            where the `PyObject.Name` matches `object_str`.
    """
    items = [
        object
        for object in self._objects
        if object_str.lower() in object.Name.lower()
    ]
    return self.__class__.mro()[0](items)