Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Matthew Krafczyk
pandas_sequence
Commits
c00423df
Commit
c00423df
authored
Feb 11, 2021
by
Matthew Krafczyk
Browse files
Change fundamental functioning of sequence_df. Add multitude of tests
parent
796d28ff
Changes
2
Hide whitespace changes
Inline
Side-by-side
pandas_sequence/sequence.py
View file @
c00423df
...
...
@@ -100,52 +100,7 @@ def contiguous_group_indices(df, sequence_index_level=None, sequence_col=None, s
return
grp_ids
def
sequence_plain_df
(
df
,
num_before
,
num_after
,
inc_val
=
True
):
"""
Sequence feature data into multi-component rows.
This function takes a dataframe containing various features over a sequence. This dataframe is
assumed to be 'in-order' that is, each row is 1 'unit' away from either row.
The input dataframe should have the following structure:
Sequence | 'Feat 1' | 'Feat 2' |
s1 | f1(s1) | f2(s1) |
s2 | f1(s2) | f2(s2) |
...
The function then returns for num_before=2, num_after=0, inc_date=True:
Sequence | ('Feat 1' , -2) | ('Feat 2', -2) | ('Feat 1', -1) | ('Feat 2', -1) | ('Feat 1', 0) | ('Feat 2', 0) |
s3 | f1(s1) | f2(s1) | f1(s2) | f2(s2) | f1(s3) | f2(s3) |
s4 | f1(s2) | f2(s2) | f1(s3) | f2(s3) | f1(s4) | f2(s4) |
...
"""
# Build segments
segments
=
[]
columns
=
df
.
columns
for
i
in
range
(
num_before
,
-
num_after
-
1
,
-
1
):
segment
=
None
if
i
==
0
:
if
inc_val
:
segment
=
df
else
:
segment
=
df
.
shift
(
i
)
if
segment
is
not
None
:
segment
.
columns
=
pd
.
MultiIndex
.
from_product
([
columns
,[
-
i
]])
segments
.
append
(
segment
)
# Join segments into full dataframe.
DF
=
pd
.
concat
(
segments
,
axis
=
1
,
join
=
'outer'
).
dropna
()
return
DF
def
sequence_df
(
df
,
lags
,
group_index_level
=
None
,
group_col
=
None
,
sequence_index_level
=
None
,
sequence_col
=
None
,
sequence_function
=
None
):
def
sequence_df
(
df
,
lags
,
group_specs
):
"""
Sequence feature data into multi-component rows.
...
...
@@ -182,57 +137,142 @@ def sequence_df(df, lags,
A pandas dataframe containing rows of prediction and/or label data.
"""
# group_index_level=None, group_col=None,
# sequence_index_level=None, sequence_col=None,
# sequence_function=None):
print
(
"sequence_df start"
)
if
group_index_level
is
None
and
group_col
is
None
:
# Compute Group ids
G_ids
=
contiguous_group_indices
(
df
,
sequence_index_level
=
sequence_index_level
,
sequence_col
=
sequence_col
,
sequence_function
=
sequence_function
)
else
:
if
type
(
group_col
)
is
pd
.
core
.
series
.
Series
:
G_ids
=
group_col
elif
group_index_level
is
not
None
:
if
group_index_level
in
df
.
index
.
names
:
g_idx
=
df
.
index
.
names
.
index
(
group_index_level
)
elif
type
(
group_index_level
)
is
int
:
g_idx
=
group_index_level
# Build group sequences
# Group spec:
# ('group', 'level', 'level_name')
# ('group', 'column', 'column_name')
# ('sequence', 'level', 'level_name', <sequence_function>)
# ('sequence', 'column', 'column_name', <sequence_function>)
by
=
[]
level
=
[]
remove_columns
=
[]
for
spec
in
group_specs
:
if
len
(
spec
)
<
2
:
raise
ValueError
(
"Group specs must contain at least three elements"
)
if
spec
[
0
]
==
'group'
:
# These are group type specs
if
spec
[
1
]
==
'level'
:
if
spec
[
2
]
not
in
df
.
index
.
names
:
raise
ValueError
(
f
"Level name
{
spec
[
2
]
}
not found in index!"
)
level
.
append
(
spec
[
2
])
elif
spec
[
1
]
==
'column'
:
if
type
(
spec
[
2
])
is
pd
.
core
.
series
.
Series
:
by
.
append
(
spec
[
2
])
else
:
if
spec
[
2
]
not
in
df
.
columns
:
raise
ValueError
(
f
"Column name
{
spec
[
2
]
}
not found!"
)
by
.
append
(
df
[
spec
[
2
]])
remove_columns
.
append
(
spec
[
2
])
else
:
raise
ValueError
(
f
"group_index_level of type
{
type
(
group_index_level
)
}
not supported"
)
G_ids
=
df
.
index
.
to_series
().
apply
(
lambda
i
:
i
[
g_idx
])
raise
ValueError
(
f
"Group subtype
{
spec
[
1
]
}
not supported"
)
elif
spec
[
0
]
==
'sequence'
:
# These are the sequence type specs
if
spec
[
1
]
==
'level'
:
if
spec
[
2
]
not
in
df
.
index
.
names
:
raise
ValueError
(
f
"Level name
{
spec
[
2
]
}
not found in index!"
)
if
len
(
spec
)
==
4
:
if
not
callable
(
spec
[
3
]):
raise
ValueError
(
f
"The fourth element of a group spec must be a callable!"
)
g_ids
=
contiguous_group_indices
(
df
,
sequence_index_level
=
spec
[
2
],
sequence_function
=
spec
[
3
])
else
:
g_ids
=
contiguous_group_indices
(
df
,
sequence_index_level
=
spec
[
2
])
by
.
append
(
g_ids
)
elif
spec
[
1
]
==
'column'
:
if
type
(
spec
[
2
])
is
pd
.
core
.
series
.
Series
:
if
len
(
spec
)
==
4
:
if
not
callable
(
spec
[
3
]):
raise
ValueError
(
f
"The fourth element of a group spec must be a callable!"
)
g_ids
=
contiguous_group_indices
(
df
,
sequence_col
=
spec
[
2
],
sequence_function
=
spec
[
3
])
else
:
g_ids
=
contiguous_group_indices
(
df
,
sequence_col
=
spec
[
2
])
else
:
if
spec
[
2
]
not
in
df
.
columns
:
raise
ValueError
(
f
"Column name
{
spec
[
2
]
}
not found!"
)
remove_columns
.
append
(
spec
[
2
])
if
len
(
spec
)
==
4
:
if
not
callable
(
spec
[
3
]):
raise
ValueError
(
f
"The fourth element of a group spec must be a callable!"
)
g_ids
=
contiguous_group_indices
(
df
,
sequence_col
=
spec
[
2
],
sequence_function
=
spec
[
3
])
else
:
g_ids
=
contiguous_group_indices
(
df
,
sequence_col
=
spec
[
2
])
by
.
append
(
g_ids
)
elif
spec
[
1
]
==
'index'
:
if
len
(
spec
)
==
3
:
if
not
callable
(
spec
[
2
]):
raise
ValueError
(
f
"The third element of an index group spec must be a callable!"
)
g_ids
=
contiguous_group_indices
(
df
,
sequence_function
=
spec
[
2
])
else
:
g_ids
=
contiguous_group_indices
(
df
)
by
.
append
(
g_ids
)
else
:
raise
ValueError
(
f
"Group subtype
{
spec
[
1
]
}
not supported"
)
else
:
G_ids
=
df
[
group_col
]
raise
ValueError
(
f
"Group spec of type
{
spec
[
0
]
}
not supported"
)
print
(
"Contiguous Ids"
)
print
(
G_ids
)
# Remove columns
temp_df
=
df
.
loc
[:,
list
(
filter
(
lambda
c
:
c
not
in
remove_columns
,
df
.
columns
))]
if
sequence_col
is
not
None
and
type
(
sequence_col
)
is
not
pd
.
core
.
series
.
Series
:
# If sequence_col specifies a column of the dataFrame, we remove this column from the sequencing.
temp_df
=
df
.
loc
[:,
list
(
filter
(
lambda
c
:
c
!=
sequence_col
,
df
.
columns
))]
elif
group_col
is
not
None
and
type
(
group_col
)
is
not
pd
.
core
.
series
.
Series
:
# If group_col specifies a column of the dataFrame, we remove this column from the sequencing.
temp_df
=
df
.
loc
[:,
list
(
filter
(
lambda
c
:
c
!=
group_col
,
df
.
columns
))]
else
:
temp_df
=
df
print
(
"temp_df:"
)
print
(
"---temp_df---"
)
print
(
temp_df
)
print
(
type
(
temp_df
))
print
(
temp_df
.
dtypes
)
print
(
"---adjusting by and level---"
)
print
(
"level"
)
print
(
level
)
print
(
"by"
)
print
(
by
)
if
len
(
level
)
==
0
:
level
=
None
if
len
(
by
)
==
0
:
by
=
None
# There's a bug where if both by and level are passed to groupby, it throws an error:
# TypeError: 'numpy.ndarray' object is not callable
# We need to mitigate this by detecting if both by and level are non-zero and if so, transition the level values
# to columns and add them to by.
if
level
is
not
None
and
by
is
not
None
:
for
lvl
in
level
:
lvl_idx
=
df
.
index
.
names
.
index
(
lvl
)
lvl_vals
=
df
.
index
.
to_series
().
apply
(
lambda
t
:
t
[
lvl_idx
])
by
.
append
(
lvl_vals
)
level
=
None
print
(
"---after adjustment---"
)
print
(
"level:"
)
print
(
level
)
print
(
"by:"
)
print
(
by
)
# Change column types to support nans
integer_columns
=
temp_df
.
dtypes
[
temp_df
.
dtypes
.
apply
(
pd
.
api
.
types
.
is_integer_dtype
)]
for
col_name
in
integer_columns
.
index
:
print
(
f
"1: col_name:
{
col_name
}
"
)
temp_df
.
loc
[:,
col_name
]
=
temp_df
.
loc
[:,
col_name
].
astype
(
pd
.
Int64Dtype
())
print
(
"temp_df after integer column change"
)
print
(
"
---
temp_df after integer column change
---
"
)
print
(
temp_df
)
print
(
temp_df
.
dtypes
)
print
(
"group ids"
)
print
(
G_ids
)
# Produce Groupby
# Groupby
temp_gbydf
=
temp_df
.
groupby
(
G_ids
)
print
(
"---before groupby---"
)
print
(
f
"by"
)
print
(
by
)
print
(
type
(
by
))
print
(
f
"level:"
)
print
(
level
)
temp_gbydf
=
temp_df
.
groupby
(
by
=
by
,
level
=
level
,
axis
=
0
)
print
(
"---after groupby---"
)
dfs
=
[]
...
...
tests/test_functionality.py
View file @
c00423df
import
pandas
as
pd
import
pandas_sequence
as
pds
import
numpy
as
np
def
test_basic_group_col_1_1
():
# Build test DataFrame
...
...
@@ -16,7 +17,7 @@ def test_basic_group_col_1_1():
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'column'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -27,7 +28,7 @@ def test_basic_group_col_1_1():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_group_col_1_2
():
# Build test DataFrame
...
...
@@ -44,7 +45,7 @@ def test_basic_group_col_1_2():
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'column'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -55,7 +56,7 @@ def test_basic_group_col_1_2():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_group_col_1_3
():
# Build test DataFrame
...
...
@@ -73,7 +74,7 @@ def test_basic_group_col_1_3():
)
group_col
=
pd
.
Series
([
1
,
1
,
1
,
1
,
2
,
2
,
2
,
3
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
group_col
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'column'
,
group_col
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -84,7 +85,7 @@ def test_basic_group_col_1_3():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_1
():
# Build test DataFrame
...
...
@@ -101,7 +102,7 @@ def test_basic_1():
index
=
[
0
,
1
,
2
,
3
,
5
,
6
,
7
,
9
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
]
,
[(
'sequence'
,
'index'
)]
)
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -112,7 +113,7 @@ def test_basic_1():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
6
,
7
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_group_index_1
():
# Build test DataFrame
...
...
@@ -138,7 +139,7 @@ def test_basic_group_index_1():
names
=
[
'Group'
,
'count'
])
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_index_
level
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'
level
'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -155,7 +156,7 @@ def test_basic_group_index_1():
(
'B'
,
6
)],
names
=
[
'Group'
,
'count'
]))
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_sequence_index_1
():
# Build test DataFrame
...
...
@@ -181,7 +182,7 @@ def test_basic_sequence_index_1():
names
=
[
'Group'
,
'count'
])
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
sequence
_index_
level
=
'count'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
sequence
'
,
'
level
'
,
'count'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -198,7 +199,7 @@ def test_basic_sequence_index_1():
(
'B'
,
2
)],
names
=
[
'Group'
,
'count'
]))
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_group_col_2_1
():
# Build test DataFrame
...
...
@@ -215,7 +216,7 @@ def test_basic_group_col_2_1():
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'column'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -226,7 +227,7 @@ def test_basic_group_col_2_1():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_group_col_2_2
():
# Build test DataFrame
...
...
@@ -243,7 +244,7 @@ def test_basic_group_col_2_2():
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'column'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -254,7 +255,7 @@ def test_basic_group_col_2_2():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_sequence_col_1_1
():
# Build test DataFrame
...
...
@@ -271,7 +272,7 @@ def test_basic_sequence_col_1_1():
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
sequence
_col
=
'Seq'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
sequence
'
,
'column'
,
'Seq'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -282,7 +283,7 @@ def test_basic_sequence_col_1_1():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_sequence_col_1_2
():
# Build test DataFrame
...
...
@@ -302,7 +303,7 @@ def test_basic_sequence_col_1_2():
def
str_diff
(
s
,
ref_val
):
return
ord
(
s
)
-
ord
(
ref_val
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
sequence
_col
=
'Seq'
,
sequence_function
=
str_diff
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
sequence
'
,
'column'
,
'Seq'
,
str_diff
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -313,7 +314,7 @@ def test_basic_sequence_col_1_2():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_basic_sequence_col_1_3
():
# Build test DataFrame
...
...
@@ -335,7 +336,7 @@ def test_basic_sequence_col_1_3():
def
str_diff
(
s
,
ref_val
):
return
ord
(
s
)
-
ord
(
ref_val
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
sequence
_col
=
sequence_col
,
sequence_
function
=
str_diff
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
sequence
'
,
'column'
,
sequence_
col
,
str_diff
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
...
...
@@ -346,32 +347,259 @@ def test_basic_sequence_col_1_3():
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
assert
sequenced_df
.
equals
(
target_df
)
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_large_
group_col_
1_1
():
def
test_large_1_1
():
# Build test DataFrame
df
=
pd
.
DataFrame
(
[[
1
,
1
],
[
1
,
2
],
[
1
,
3
],
[
1
,
4
],
[
2
,
1
],
[
2
,
2
],
[
2
,
3
],
[
3
,
1
]],
columns
=
[
'Group'
,
'Value'
],
index
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
[[
'A'
,
'2020-01-01'
,
0.2
],
[
'A'
,
'2020-01-02'
,
-
0.1
],
[
'A'
,
'2020-01-03'
,
0.345
],
[
'A'
,
'2020-01-04'
,
0.55
],
[
'A'
,
'2020-01-05'
,
1.2
],
[
'B'
,
'2020-01-01'
,
-
2.5
],
[
'B'
,
'2020-01-02'
,
3.7
],
[
'B'
,
'2020-01-03'
,
3.5
],
[
'B'
,
'2020-01-04'
,
0.3
],
[
'B'
,
'2020-01-05'
,
-
1.
],
[
'C'
,
'2020-01-01'
,
-
2.1
],
[
'C'
,
'2020-01-02'
,
1.1
],
[
'C'
,
'2020-01-03'
,
1.123
],
[
'C'
,
'2020-01-04'
,
5.3
],
[
'D'
,
'2020-01-01'
,
5.55
]],
columns
=
[
'Group'
,
'Date'
,
'Value'
],
)
df
=
df
.
set_index
([
'Group'
,
'Date'
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
group
_col
=
'Group'
)
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
1
,
0
],
[(
'
group
'
,
'level'
,
'Group'
)
])
target_df
=
pd
.
DataFrame
(
[[
1
,
2
],
[
2
,
3
],
[
3
,
4
],
[
1
,
2
],
[
2
,
3
]],
columns
=
pd
.
MultiIndex
.
from_product
([[
'Value'
],[
-
1
,
0
]]),
index
=
[
1
,
2
,
3
,
5
,
6
])
[[
'A'
,
'2020-01-02'
,
0.2
,
-
0.1
],
[
'A'
,
'2020-01-03'
,
-
0.1
,
0.345
],
[
'A'
,
'2020-01-04'
,
0.345
,
0.55
],
[
'A'
,
'2020-01-05'
,
0.55
,
1.2
],
[
'B'
,
'2020-01-02'
,
-
2.5
,
3.7
],
[
'B'
,
'2020-01-03'
,
3.7
,
3.5
],
[
'B'
,
'2020-01-04'
,
3.5
,
0.3
],
[
'B'
,
'2020-01-05'
,
0.3
,
-
1.
],
[
'C'
,
'2020-01-02'
,
-
2.1
,
1.1
],
[
'C'
,
'2020-01-03'
,
1.1
,
1.123
],
[
'C'
,
'2020-01-04'
,
1.123
,
5.3
]],
columns
=
[
'Group'
,
'Date'
,
(
'Value'
,
-
1
),
(
'Value'
,
0
)],
)
target_df
=
target_df
.
set_index
([
'Group'
,
'Date'
])
target_df
.
columns
=
pd
.
MultiIndex
.
from_tuples
(
list
(
target_df
.
columns
))
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_large_1_2
():
# Build test DataFrame
df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-01'
,
0.2
],
[
'A'
,
'2020-01-02'
,
-
0.1
],
[
'A'
,
'2020-01-03'
,
0.345
],
[
'A'
,
'2020-01-04'
,
0.55
],
[
'A'
,
'2020-01-05'
,
1.2
],
[
'B'
,
'2020-01-01'
,
-
2.5
],
[
'B'
,
'2020-01-02'
,
3.7
],
[
'B'
,
'2020-01-03'
,
3.5
],
[
'B'
,
'2020-01-04'
,
0.3
],
[
'B'
,
'2020-01-05'
,
-
1.
],
[
'C'
,
'2020-01-01'
,
-
2.1
],
[
'C'
,
'2020-01-02'
,
1.1
],
[
'C'
,
'2020-01-03'
,
1.123
],
[
'C'
,
'2020-01-04'
,
5.3
],
[
'D'
,
'2020-01-01'
,
5.55
]],
columns
=
[
'Group'
,
'Date'
,
'Value'
],
)
df
=
df
.
set_index
([
'Group'
,
'Date'
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
2
,
0
,
1
],
[(
'group'
,
'level'
,
'Group'
)])
target_df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-03'
,
0.2
,
0.345
,
0.55
],
[
'A'
,
'2020-01-04'
,
-
0.1
,
0.55
,
1.2
],
[
'B'
,
'2020-01-03'
,
-
2.5
,
3.5
,
0.3
],
[
'B'
,
'2020-01-04'
,
3.7
,
0.3
,
-
1.
],
[
'C'
,
'2020-01-03'
,
-
2.1
,
1.123
,
5.3
]],
columns
=
[
'Group'
,
'Date'
,
(
'Value'
,
-
2
),
(
'Value'
,
0
),
(
'Value'
,
1
)],
)
target_df
=
target_df
.
set_index
([
'Group'
,
'Date'
])
target_df
.
columns
=
pd
.
MultiIndex
.
from_tuples
(
list
(
target_df
.
columns
))
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_large_1_3
():
# Build test DataFrame
df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-01'
,
0.2
],
[
'A'
,
'2020-01-02'
,
-
0.1
],
[
'A'
,
'2020-01-03'
,
0.345
],
[
'A'
,
'2020-01-04'
,
0.55
],
[
'A'
,
'2020-01-05'
,
1.2
],
[
'B'
,
'2020-01-01'
,
-
2.5
],
[
'B'
,
'2020-01-02'
,
3.7
],
[
'B'
,
'2020-01-03'
,
3.5
],
[
'B'
,
'2020-01-04'
,
0.3
],
[
'B'
,
'2020-01-05'
,
-
1.
],
[
'C'
,
'2020-01-01'
,
-
2.1
],
[
'C'
,
'2020-01-02'
,
1.1
],
[
'C'
,
'2020-01-03'
,
1.123
],
[
'C'
,
'2020-01-04'
,
5.3
],
[
'D'
,
'2020-01-01'
,
5.55
]],
columns
=
[
'Group'
,
'Date'
,
'Value'
],
)
df
=
df
.
set_index
([
'Group'
,
'Date'
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
0
,
-
1
],
[(
'group'
,
'level'
,
'Group'
)])
target_df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-02'
,
-
0.1
,
0.2
],
[
'A'
,
'2020-01-03'
,
0.345
,
-
0.1
],
[
'A'
,
'2020-01-04'
,
0.55
,
0.345
],
[
'A'
,
'2020-01-05'
,
1.2
,
0.55
],
[
'B'
,
'2020-01-02'
,
3.7
,
-
2.5
],
[
'B'
,
'2020-01-03'
,
3.5
,
3.7
],
[
'B'
,
'2020-01-04'
,
0.3
,
3.5
],
[
'B'
,
'2020-01-05'
,
-
1.
,
0.3
],
[
'C'
,
'2020-01-02'
,
1.1
,
-
2.1
],
[
'C'
,
'2020-01-03'
,
1.123
,
1.1
],
[
'C'
,
'2020-01-04'
,
5.3
,
1.123
]],
columns
=
[
'Group'
,
'Date'
,
(
'Value'
,
0
),
(
'Value'
,
-
1
)],
)
target_df
=
target_df
.
set_index
([
'Group'
,
'Date'
])
target_df
.
columns
=
pd
.
MultiIndex
.
from_tuples
(
list
(
target_df
.
columns
))
pd
.
testing
.
assert_frame_equal
(
sequenced_df
,
target_df
)
def
test_large_1_4
():
# Build test DataFrame
df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-01'
,
0.2
],
[
'A'
,
'2020-01-02'
,
-
0.1
],
[
'A'
,
'2020-01-03'
,
0.345
],
[
'A'
,
'2020-01-04'
,
0.55
],
[
'A'
,
'2020-01-05'
,
1.2
],
[
'B'
,
'2020-01-01'
,
-
2.5
],
[
'B'
,
'2020-01-02'
,
3.7
],
[
'B'
,
'2020-01-03'
,
3.5
],
[
'B'
,
'2020-01-04'
,
0.3
],
[
'B'
,
'2020-01-05'
,
-
1.
],
[
'C'
,
'2020-01-01'
,
-
2.1
],
[
'C'
,
'2020-01-02'
,
1.1
],
[
'C'
,
'2020-01-03'
,
1.123
],
[
'C'
,
'2020-01-04'
,
5.3
],
[
'D'
,
'2020-01-01'
,
5.55
]],
columns
=
[
'Group'
,
'Date'
,
'Value'
],
)
df
=
df
.
set_index
([
'Group'
,
'Date'
])
sequenced_df
=
pds
.
sequence_df
(
df
,
[
-
2
,
-
1
,
0
],
[(
'group'
,
'level'
,
'Group'
)])
target_df
=
pd
.
DataFrame
(
[[
'A'
,
'2020-01-03'
,
0.2
,
-
0.1
,
0.345
],
[
'A'
,
'2020-01-04'
,
-
0.1
,
0.345
,
0.55
],
[
'A'
,
'2020-01-05'
,
0.345
,
0.55
,
1.2
],
[
'B'
,
'2020-01-03'
,
-
2.5
,
3.7
,
3.5
],
[
'B'
,
'2020-01-04'
,
3.7
,
3.5
,
0.3
],