Patching Dec 9, 2021 6-7a CST- All GitLab services may be unavailable for 5-10 minutes

Commit c2a4e853 authored by Matthew Krafczyk's avatar Matthew Krafczyk
Browse files

Update contiguous_group_indices to take a series type as well

parent b0baaf20
import pandas as pd
def contiguous_group_indices(df, sequence_index_level=None, sequence_col=None, sequence_function=None):
def contiguous_group_indices(df_or_series, sequence_index_level=None, sequence_col=None, sequence_function=None):
"""
Produce series of continuous group labels for a given sequence column and sequencing function
......@@ -58,20 +58,25 @@ def contiguous_group_indices(df, sequence_index_level=None, sequence_col=None, s
# Fetch sequence series
sequence_series = None
if sequence_col is not None:
if type(sequence_col) is pd.core.series.Series:
# Use the sequence column if it's a Series
sequence_series = sequence_col
if type(df_or_series) is pd.DataFrame:
if sequence_col is not None:
sequence_series = df_or_series.loc[:,sequence_col]
elif sequence_index_level is not None:
if sequence_index_level not in df_or_series.index.names:
raise ValueError(f"sequence_index_level {sequence_index_level} not in the data frame. available levels: {df_or_series.index.names}")
level_idx = df_or_series.index.names.index(sequence_index_level)
sequence_series = df_or_series.index.to_series().apply(lambda t: t[level_idx])
else:
sequence_series = df.loc[:,sequence_col]
elif sequence_index_level is not None:
if sequence_index_level not in df.index.names:
raise ValueError(f"sequence_index_level {sequence_index_level} not in the data frame. available levels: {df.index.names}")
level_idx = df.index.names.index(sequence_index_level)
sequence_series = df.index.to_series().apply(lambda t: t[level_idx])
# Get the dataframe index
sequence_series = df_or_series.index.to_series()
elif type(df_or_series) is pd.core.series.Series:
sequence_series = df_or_series
else:
# Get the dataframe index
sequence_series = df.index.to_series()
try:
# Fallback assuming we might have an index. If we do this will succeed.
sequence_series = df_or_series.to_series()
except AttributeError:
raise TypeError(f"You must pass a pandas series, index, or dataframe as df_or_series. Got type {type(df_or_series)}")
# If the sequence function is None, set it as the simple difference formula
if sequence_function is None:
......@@ -186,9 +191,9 @@ def sequence_df(df, lags, group_specs):
if len(spec) == 4:
if not callable(spec[3]):
raise ValueError(f"The fourth element of a group spec must be a callable!")
g_ids = contiguous_group_indices(df, sequence_col=spec[2], sequence_function=spec[3])
g_ids = contiguous_group_indices(spec[2], sequence_function=spec[3])
else:
g_ids = contiguous_group_indices(df, sequence_col=spec[2])
g_ids = contiguous_group_indices(spec[2])
else:
if spec[2] not in df.columns:
raise ValueError(f"Column name {spec[2]} not found!")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment