"""Loading and parsing pedar plantar pressure data and construct a data node tree for further analysis.
Example
---
::
from pedarProbe import parse
condition_list = ['fast walking', 'slow walking', 'normal walking']
data = parse.trails_parse(
"data/subjects/walking plantar pressure time slot.xlsx", # the guiding sheet's path
condition_list, # condition list will be used for format checking
# max_read_rate=0.1,
)
"""
from __future__ import annotations
from typing import Type, Union
import re
import sys
import numpy as np
import pandas as pd
from pedarProbe.node import PedarNode, DataNode
[docs]class Pedar_asc(object):
"""Reader for :code:`.asc` file exported from pedar.
Parameters
---
path
path of the :code:`.asc` file exported from pedar.
skiprows
number of rows to be skipped in file reading.
header
the index of row to be set as :attr:`self.doc`'s header.
index_col
the index of column to be set as the :attr:`self.doc`'s index.
Note
---
`Class Attributes`
self.path :class:`str`
path of the :code:`.asc` file exported from pedar.
self.doc :class:`pandas.core.frame.DataFrame`
loaded data frame, with sensor IDs as the columns (0~98 for left foot and 99 ~ 197 for the right foot) and time values as the rows.
.. tip::
There are two sensor ID numbering conventions. Please refer to :meth:`id_map` for more information.
"""
[docs] def __init__(self, path: str, skiprows : int = 9, header : int = 9, index_col : int = 0):
names = [idx for idx in range(199)]
self.path = path
self.doc = pd.read_csv(self.path, delimiter='\t', skiprows=skiprows, header=header, names=names, index_col=index_col)
# column length check
if self.doc.shape[1] != 199:
print("\n{}'s dataframe has abnormal shape".format(path))
[docs] def id_map(self, foot: str, sensor_id: int) -> int:
"""Maps sensor ID numbering from pedar convention to :mod:`pedarProbe` convention:
- pedar convention: for each foot, sensors are numbered as 1~99.
- :mod:`pedarProbe` convention: 0~98 for left foot sensors and 99 ~ 197 for the right foot sensors.
Parameters
---
foot
:code:`'L'` as left foot and :code:`'R'` as right foot.
sensor_id
sensor ID in pedar convention.
Return
---
:class:`int`
sensor ID in :mod:`pedarProbe` convention.
"""
if foot == 'L' or foot == 'l':
# left foot sensor 1~99 map to column 0~98
return sensor_id - 1
elif foot == 'R' or foot == 'r':
# right foot sensor 1~99 map to column 99~197
return sensor_id + 98
else:
print('invalid foot type when enquiry {}'.format(self.path))
[docs] def get_time_sensor(self, foot: str, time: float, sensor_id: int) -> np.float64:
"""Get value with time and sensor ID.
Parameters
---
foot
:code:`'L'` as left foot and :code:`'R'` as right foot.
time
time value.
sensor_id
sensor ID in pedar convention.
Return
---
:class:`numpy.float64`
"""
return self.doc.loc[time, self.id_map(foot, sensor_id)]
[docs] def get_time_seq(self, foot: str, time: float, start_sensor_id: int, end_sensor_id: int) -> pd.core.series.Series:
"""Get a sequence of values with time and start & end sensor IDs.
Parameters
---
foot
:code:`'L'` as left foot and :code:`'R'` as right foot.
time
time value.
start_sensor_id
start sensor ID in pedar convention.
end_sensor_id
end sensor ID in pedar convention.
Return
---
:class:`pandas.core.series.Series`
"""
return self.doc.loc[time, self.id_map(foot, start_sensor_id):self.id_map(foot, end_sensor_id)]
[docs] def get_sensor_seq(self, foot: str, sensor_id: int, start_time: float, end_time: float) -> pd.core.series.Series:
"""Get a sequence of values with sensor ID and start & end time.
Parameters
---
foot
:code:`'L'` as left foot and :code:`'R'` as right foot.
sensor_id
start sensor ID in pedar convention.
start_time
start time value.
end_time
end time value.
Return
---
:class:`pandas.core.series.Series`
"""
return self.doc.loc[start_time:end_time, self.id_map(foot, sensor_id)]
[docs] def get_time_sensor_slice(self, foot: str, start_time: float, end_time: float, start_sensor_id: int = 1, end_sensor_id: int = 99) -> pd.core.frame.DataFrame:
"""Get a frame of values with start & end sensor IDs and start & end time.
Parameters
---
foot
:code:`'L'` as left foot and :code:`'R'` as right foot.
start_sensor_id
start sensor ID in pedar convention.
end_sensor_id
end sensor ID in pedar convention.
start_time
start time value.
end_time
end time value.
Return
---
:class:`pandas.core.frame.DataFrame`
"""
return self.doc.loc[start_time:end_time, self.id_map(foot, start_sensor_id):self.id_map(foot, end_sensor_id)]
[docs]def progress_bar(percent: float, bar_len: int = 20):
"""Print & refresh the progress bar in terminal.
Parameters
---
percent
percentage from 0 to 1.
bar_len
length of the progress bar
"""
sys.stdout.write("\r")
sys.stdout.write("[{:<{}}] {:.1%}".format("=" * int(bar_len * percent), bar_len, percent))
sys.stdout.flush()
# avoiding '%' appears when progress completed
if percent == 1:
print()
[docs]def add_trail(node: PedarNode, asc: str, folder: str, condition: str, trail: str, foot: str, stances: list):
"""Construct node tree starting from a subject node according to information of an entry in the guiding sheet.
Parameters
---
node
the subject node.
asc
the :code:`asc` file name.
folder
the folder of the :code:`asc` file.
condition
condition name.
trail
trail name.
foot
foot name.
stances
a list of stance timestamp string. Each item of :attr:`stances` is in the form of :code:`'<start_time>-<end_time>'`.
Note
---
The same subject's information in the same condition and trail is separated in different entries.Each of them contains one foot type with specific stances time stamp. Therefore the node tree is not constructed in one go. This function is developed to handle the incremental construction process.
"""
if condition not in node.branch_names():
condition_node = PedarNode()
condition_node.setup(name=condition)
node.add_branch(condition_node)
if trail not in node[condition].branch_names():
trail_node = PedarNode()
trail_node.setup(name=trail)
node[condition].add_branch(trail_node)
# read asc file object
asc_object = Pedar_asc('{}/{}/{}.asc'.format(folder, node.name, asc))
# then filled foot and stances data, which complete the dictionary structure to
# node[condition][trail][foot][stance]
foot_node = PedarNode()
foot_node.setup(name=foot)
node[condition][trail].add_branch(foot_node)
for idx in range(len(stances)):
stance = stances[idx]
# skip empty/invalid stance
# since the data of some empty stances are int, transform it to str in advance
if not re.search('[1-9][0-9\.]*-[1-9][0-9\.]*', str(stance)):
continue
start = float(re.search('^[0-9\.]+[^-]', stance).group())
end = float(re.search('[^-][0-9\.]+$', stance).group())
df = asc_object.get_time_sensor_slice(foot, start, end)
stance_node = DataNode()
stance_node.setup(df, start, end, name='stance ' + str(idx + 1))
node[condition][trail][foot].add_branch(stance_node)
[docs]def trails_parse(path: Union[None, str], condition_list: list, max_read_rate: float = 1.0):
"""Load and parse pedar plantar pressure data and return the constructed node tree according to the guiding sheet.
Parameters
---
path
the path of the guiding sheet.
condition_list
a list of condition names.
.. warning::
It will be used for format checking for the entries in the guiding sheet.
max_read_rate
:attr:`max_read_rate` is the percentage from 0 ~ 1. Only load :attr:`max_read_rate` of entries.
.. tip::
Data loading is very time consuming. When developing new features, it may speed up the verification and debug by setting a low :attr:`max_read_rate`. ::
from pedarProbe import parse
condition_list = ['fast walking', 'slow walking', 'normal walking']
data = parse.trails_parse(
"data/subjects/walking plantar pressure time slot.xlsx", # the guiding sheet's path
condition_list, # condition list will be used for format checking
max_read_rate=0.1,
"""
# generate entry pattern for format checking
conditions = '|'.join(condition_list)
asc_pattern = 'S[1-9][0-9]* (' + conditions + ') [1-9][0-9]*$'
# create root node
root = PedarNode()
root.setup('root')
# load the summary file
doc = pd.read_excel(path)
folder = re.search('^.*(?=/)', path).group()
length = len(doc.index)
print("loading {} data entries".format(length))
# parse each entry
for index in doc.index:
# parse information of each trail
asc = doc.loc[index, 'Unnamed: 0']
asc_check = re.match(asc_pattern, asc)
if not asc_check:
print('invalid asc entry name: {}'.format(asc))
break
condition = re.search('(?<= )[a-z ]+(?= )', asc).group()
trail = 'trail ' + re.search('[0-9]+$', asc).group()
foot = doc.loc[index, 'sideFoot']
stances = doc.loc[index, 'stance phase 1':]
# parse the subject's name
# if the subject hasn't been added to root dictionary, add it
subject_name = re.search('^S[0-9]+', asc).group()
if subject_name not in root.branch_names():
subject_node = PedarNode()
subject_node.setup(subject_name)
root.add_branch(subject_node)
# add a trial to the subject
add_trail(root[subject_name], asc, folder, condition, trail, foot, stances)
# print progress bar and break if exceed max read rate
read_rate = (index + 1) / length
progress_bar(read_rate)
if read_rate >= max_read_rate:
break
return root