Skip to content

local_independencies

Signature/Parameters

def local_independencies(self, data = None, alpha = 0.05, include_sep_cols = False)

List conditional independencies implied by the DAG, and test them if data is provided.

Parameters:

Name Type Description Default
data DataFrame or None

Observational data used to perform local conditional independence tests through dagitty::localTests. When None (default), the method enumerates implied independencies analytically.

None
alpha float

Significance level for converting quantile-based confidence bounds into standard errors. Only used when data is provided. Defaults to 0.05.

0.05
include_sep_cols bool

When True, return additional columns detailing the separated variables and conditioning sets. Defaults to False.

False

Returns:

Type Description
DataFrame

Tidy representation of the implied independencies. The result always includes columns term (formatted as "Y _||_ X | Z"), estimate, se, lo, hi, and pvalue. When include_sep_cols is True, columns var1, var2, and cond are also present.

Examples:

>>> G = DAG(graph="X -> Z -> Y")
>>> independencies = G.local_independencies(include_sep_cols=True)
>>> independencies.pull("term").to_list()
['Y _||_ X | Z']
Source code in causalinf/gcm.py
def local_independencies(self, data=None, alpha=0.05, include_sep_cols=False):
    """
    List conditional independencies implied by the DAG, and test them if data is provided.

    Parameters
    ----------
    data : tidypolars4sci.DataFrame or None, optional
        Observational data used to perform local conditional independence
        tests through ``dagitty::localTests``. When ``None`` (default), the
        method enumerates implied independencies analytically.
    alpha : float, optional
        Significance level for converting quantile-based confidence bounds
        into standard errors. Only used when ``data`` is provided. Defaults
        to 0.05.
    include_sep_cols : bool, optional
        When ``True``, return additional columns detailing the separated
        variables and conditioning sets. Defaults to ``False``.

    Returns
    -------
    tidypolars4sci.DataFrame
        Tidy representation of the implied independencies. The result
        always includes columns ``term`` (formatted as ``"Y _||_ X | Z"``),
        ``estimate``, ``se``, ``lo``, ``hi``, and ``pvalue``. When
        ``include_sep_cols`` is ``True``, columns ``var1``, ``var2``, and
        ``cond`` are also present.

    Examples
    --------
    >>> G = DAG(graph="X -> Z -> Y")
    >>> independencies = G.local_independencies(include_sep_cols=True)
    >>> independencies.pull("term").to_list()
    ['Y _||_ X | Z']
    """
    if data is None:
        data = self.data
    # compute
    if data is None:
        inds = dagitty.impliedConditionalIndependencies(self.__dagitty__)
        res = tp.tibble()
        for ind in inds:
            y = ind[0][0]
            x = ind[1][0]
            z = ind[2]
            term = f"{y} _||_ {x}"
            term = f"{term} | {', '.join(z)}" if z else term
            tmp = tp.tibble({'term': [term],
                             "var1": [y],
                             "var2": [x],
                             "cond": [z]})
            res = res.bind_rows(tmp)
        inds = res
    else:
        inds = dagitty.localTests(self.__dagitty__, data=convert().tp2tibble(data), abbreviate_names=False)
        z = dnorm.ppf(1-alpha/2)
        inds = convert().rtibble2tp(inds, rownames2col='term')\
                     .rename({'p.value':"pvalue",
                              '2.5%':'lo',
                              '97.5%':'hi',
                              })\
                     .mutate(se = ( tp.col('hi')-tp.col('lo') ) / (2*z) )
        if inds.nrow>0:
            inds = (
                inds
                .separate('term', into=['var1', 'var2_cond'], sep='_||_', remove=False)
                .separate('var2_cond', into=['var2', 'cond'], sep='|')
            )

    vars = ['term', 'estimate', 'se', 'lo', 'hi', 'pvalue']
    if include_sep_cols:
        vars += ['var1', 'var2', 'cond']
    inds = inds.select(vars)

    return inds