Last active
September 28, 2017 14:40
-
-
Save impredicative/57521fd21f4c3a5c0805c05bf9d54086 to your computer and use it in GitHub Desktop.
pandas_util
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import re | |
import pandas as pd | |
def _prepare_pipe_separated_str(str_input): | |
substitutions = [ | |
('^ *', ''), # Remove leading spaces | |
(' *$', ''), # Remove trailing spaces | |
(r' *\| *', '|'), # Remove spaces between columns | |
] | |
if all(line.lstrip().startswith('|') and line.rstrip().endswith('|') for line in str_input.strip().split('\n')): | |
substitutions.extend([ | |
(r'^\|', ''), # Remove redundant leading delimiter | |
(r'\|$', ''), # Remove redundant trailing delimiter | |
]) | |
for pattern, replacement in substitutions: | |
str_input = re.sub(pattern, replacement, str_input, flags=re.MULTILINE) | |
return str_input | |
def read_pipe_separated_str(str_input): | |
"""Read a Pandas object from a pipe-separated table contained within a string. | |
Example: | |
| odcd_wacs | cs_wacs | automation_eligible | | |
| | | True | | |
| | 0 | False | | |
| | 576 | True | | |
| 300 | 600 | True | | |
The leading and trailing pipes are optional, but if one is present, so must be the other. | |
In PyCharm, the "Pipe Table Formatter" plugin has a "Format" feature that can be used to neatly format a table. | |
""" | |
str_input = _prepare_pipe_separated_str(str_input) | |
return pd.read_csv(io.StringIO(str_input), sep='|') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment