Patching Dec 9, 2021 6-7a CST- All GitLab services may be unavailable for 5-10 minutes

Commit afa9600e authored by Matthew Krafczyk's avatar Matthew Krafczyk
Browse files

Add default behavior of no grouping if no group specs are given

Also add tests for this behavior
parent ad94d02a
......@@ -243,9 +243,12 @@ def sequence_df(df, lags, group_specs):
for col_name in integer_columns.index:
temp_df.loc[:,col_name] = temp_df.loc[:,col_name].astype(pd.Int64Dtype())
# Produce Groupby
# Groupby
temp_gbydf = temp_df.groupby(by=by, level=level, axis=0)
if (by is None) and (level is None):
# No need to group the DataFrame here.
temp_gbydf = temp_df
else:
# Produce Groupby DataFrame
temp_gbydf = temp_df.groupby(by=by, level=level, axis=0)
dfs = []
......
......@@ -603,3 +603,145 @@ def test_large_1_6():
target_df.columns = pd.MultiIndex.from_tuples(list(target_df.columns))
pd.testing.assert_frame_equal(sequenced_df, target_df)
def test_nogrouping_1():
# Build test DataFrame
df = pd.DataFrame(
[['2020-01-01', 0.2],
['2020-01-02', -0.1],
['2020-01-04', 0.345],
['2020-01-05', 0.55],
['2020-01-06', 1.2],
['2020-01-07', -2.5],
['2020-01-08', 3.7],
['2020-01-10', 3.5],
['2020-01-11', 0.3],
['2020-01-13', -1.],
['2020-01-01', -2.1],
['2020-01-02', 1.1],
['2020-01-03', 1.123],
['2020-01-04', 5.3],
['2020-01-05', 5.55]],
columns=['Date', 'Value'],
)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
sequenced_df = pds.sequence_df(df, [-1, 0], [])
target_df = pd.DataFrame(
[['2020-01-02', 0.2, -0.1],
['2020-01-04', -0.1, 0.345],
['2020-01-05', 0.345, 0.55],
['2020-01-06', 0.55, 1.2],
['2020-01-07', 1.2, -2.5],
['2020-01-08', -2.5, 3.7],
['2020-01-10', 3.7, 3.5],
['2020-01-11', 3.5, 0.3],
['2020-01-13', 0.3, -1.],
['2020-01-01', -1., -2.1],
['2020-01-02', -2.1, 1.1],
['2020-01-03', 1.1, 1.123],
['2020-01-04', 1.123, 5.3],
['2020-01-05', 5.3, 5.55]],
columns=['Date', ('Value', -1), ('Value', 0)],
)
target_df['Date'] = pd.to_datetime(target_df['Date'])
target_df = target_df.set_index('Date')
target_df.columns = pd.MultiIndex.from_tuples(list(target_df.columns))
pd.testing.assert_frame_equal(sequenced_df, target_df)
def test_nogrouping_2():
# Build test DataFrame
df = pd.DataFrame(
[['2020-01-01 00:00:00', 0.2],
['2020-01-01 01:00:00', -0.1],
['2020-01-01 02:00:00', 0.345],
['2020-01-01 03:00:00', 0.55],
['2020-01-01 04:00:00', 1.2],
['2020-01-01 05:00:00', -2.5],
['2020-01-01 06:00:00', 3.7],
['2020-01-01 07:00:00', 3.5],
['2020-01-01 08:00:00', 0.3],
['2020-01-01 09:00:00', -1.],
['2020-01-01 10:00:00', -2.1],
['2020-01-01 11:00:00', 1.1],
['2020-01-01 12:00:00', 1.123],
['2020-01-01 13:00:00', 5.3],
['2020-01-01 14:00:00', 5.55]],
columns=['Date', 'Value'],
)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
sequenced_df = pds.sequence_df(df, [-1, 0], [])
target_df = pd.DataFrame(
[['2020-01-01 01:00:00', 0.2, -0.1],
['2020-01-01 02:00:00', -0.1, 0.345],
['2020-01-01 03:00:00', 0.345, 0.55],
['2020-01-01 04:00:00', 0.55, 1.2],
['2020-01-01 05:00:00', 1.2, -2.5],
['2020-01-01 06:00:00', -2.5, 3.7],
['2020-01-01 07:00:00', 3.7, 3.5],
['2020-01-01 08:00:00', 3.5, 0.3],
['2020-01-01 09:00:00', 0.3, -1.],
['2020-01-01 10:00:00', -1., -2.1],
['2020-01-01 11:00:00', -2.1, 1.1],
['2020-01-01 12:00:00', 1.1, 1.123],
['2020-01-01 13:00:00', 1.123, 5.3],
['2020-01-01 14:00:00', 5.3, 5.55]],
columns=['Date', ('Value', -1), ('Value', 0)],
)
target_df['Date'] = pd.to_datetime(target_df['Date'])
target_df = target_df.set_index('Date')
target_df.columns = pd.MultiIndex.from_tuples(list(target_df.columns))
pd.testing.assert_frame_equal(sequenced_df, target_df)
def test_nogrouping_3():
# Build test DataFrame
df = pd.DataFrame(
[['A', 0.2],
['A', -0.1],
['A', 0.345],
['A', 0.55],
['B', 1.2],
['B', -2.5],
['B', 3.7],
['B', 3.5],
['B', 0.3],
['C', -1.],
['D', -2.1],
['E', 1.1],
['A', 1.123],
['A', 5.3],
['A', 5.55]],
columns=['Group', 'Value'],
)
df = df.set_index('Group')
sequenced_df = pds.sequence_df(df, [-1, 0], [])
target_df = pd.DataFrame(
[['A', 0.2, -0.1],
['A', -0.1, 0.345],
['A', 0.345, 0.55],
['B', 0.55, 1.2],
['B', 1.2, -2.5],
['B', -2.5, 3.7],
['B', 3.7, 3.5],
['B', 3.5, 0.3],
['C', 0.3, -1.],
['D', -1., -2.1],
['E', -2.1, 1.1],
['A', 1.1, 1.123],
['A', 1.123, 5.3],
['A', 5.3, 5.55]],
columns=['Group', ('Value', -1), ('Value', 0)],
)
target_df = target_df.set_index('Group')
target_df.columns = pd.MultiIndex.from_tuples(list(target_df.columns))
pd.testing.assert_frame_equal(sequenced_df, target_df)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment