Created
June 8, 2023 18:56
-
-
Save westonpace/361b202f10260f6be5729760eba18398 to your computer and use it in GitHub Desktop.
Measuring I/O usage of script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow.parquet as pq | |
pq.read_table("/home/pace/dev/data/lineitem_10.parquet", columns=["l_partkey"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(arrow-release-12) pace@pace-desktop:~/dev/experiments/parquet-partial-read$ strace --quiet=attach,exit -z -f -P /home/pace/dev/data/lineitem_10.parquet python whole_file_parquet.py | |
[pid 182506] stat("/home/pace/dev/data/lineitem_10.parquet", {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182506] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7 | |
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182524] pread64(7, "\1\377\241=\0211\22\341,\r%\16\241\10Q\243E\30A\212&\300\tI\310\16\341W!\210\16\206"..., 65536, 2006496747) = 65536 | |
[pid 182506] close(7) = 0 | |
[pid 182506] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7 | |
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182506] fadvise64(7, 4, 31980683, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 31980792, 80584237, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 112565136, 42934633, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 155499868, 19818795, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 175318765, 105865224, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 281184118, 27899157, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 309083361, 30046178, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 339129641, 30045944, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 369175689, 30046294, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 399222088, 12578507, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 411800685, 257055150, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 668855964, 31996331, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 700852406, 80583996, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 781436512, 42934649, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 824371262, 19823762, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 844195126, 105864556, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 950059811, 27917327, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 977977224, 30046190, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1008023516, 30045950, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1038069570, 30046302, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1068115977, 12578489, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1080694556, 257027785, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1337722478, 32000712, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1369723301, 80582589, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1450306000, 42934648, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1493240749, 19789058, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1513029909, 105863090, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1618893128, 27895361, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1646788575, 30046191, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1676834868, 30045951, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1706880923, 30046303, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1736927331, 12578500, POSIX_FADV_WILLNEED) = 0 | |
[pid 182506] fadvise64(7, 1749505921, 257049221, POSIX_FADV_WILLNEED) = 0 | |
[pid 182530] pread64(7, "\25\4\25\270\250\200\1\25\244\251\200\1L\25\216\212 \25\0\22\0\0\234\224@\364\377\377\26\256\27\0"..., 80584237, 31980792) = 80584237 | |
[pid 182528] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\1\0\0\0\2\0\0\0\3\0\0\0\4\0\0\0"..., 19818795, 155499868) = 19818795 | |
[pid 182527] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\4\5\6\4\0\t\t\7\0\n\t"..., 27899157, 281184118) = 27899157 | |
[pid 182532] pread64(7, "\25\4\25\220\232\1\25\246\232\1L\25\304&\25\0\22\0\0\210M\364\304\1B%\0\0R%\0\0"..., 30045944, 339129641) = 30045944 | |
[pid 182524] pread64(7, "\25\4\25\360\216\200\1\25\304\217\200\1L\25\334\203 \25\0\22\0\0\270\207@\364\377\377\1\0\0\0"..., 31980683, 4) = 31980683 | |
[pid 182531] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'`%\0\0~%\0\0"..., 30046178, 309083361) = 30046178 | |
[pid 182528] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578507, 399222088) = 12578507 | |
[pid 182530] pread64(7, "\25\4\25\330\237\1\25\342\237\1L\25\366'\25\0\22\0\0\354O\364\353'i%\0\0\206%\0\0"..., 30046294, 369175689) = 30046294 | |
[pid 182529] pread64(7, "\25\4\25\200\3520\25\264\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377n,\1\0\204\35"..., 42934633, 112565136) = 42934633 | |
[pid 182524] pread64(7, "\25\4\25\350\253\200\1\25\324\254\200\1L\25\372\212 \25\0\22\0\0\364\225@\364\377\377\376\207\23\0"..., 80583996, 700852406) = 80583996 | |
[pid 182528] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\2\0\0\0\3\0\0\0\4\0\0\0\5\0\0\0"..., 19823762, 824371262) = 19823762 | |
[pid 182532] pread64(7, "\25\4\25\320\216\200\1\25\274\217\200\1L\25\324\203 \25\0\22\0\0\250\207@\364\377\377\347/1\1"..., 31996331, 668855964) = 31996331 | |
[pid 182529] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\5\5\6\4\0\2\t\7\0\3\t"..., 27917327, 950059811) = 27917327 | |
[pid 182531] pread64(7, "\25\4\25\200\3520\25\260\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377\223\23\0\0\351["..., 42934649, 781436512) = 42934649 | |
[pid 182524] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'\r$\0\0\340#\0\0"..., 30046190, 977977224) = 30046190 | |
[pid 182529] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578489, 1068115977) = 12578489 | |
[pid 182528] pread64(7, "\25\4\25\220\232\1\25\232\232\1L\25\304&\25\0\22\0\0\210M\364\207&\4$\0\0\30$\0\0"..., 30045950, 1008023516) = 30045950 | |
[pid 182532] pread64(7, "\25\4\25\320\237\1\25\332\237\1L\25\364'\25\0\22\0\0\350O\364\347'\24$\0\0\361#\0\0"..., 30046302, 1038069570) = 30046302 | |
[pid 182529] pread64(7, "\25\4\25\300\203\200\1\25\252\204\200\1L\25\360\200 \25\0\22\0\0\340\201@\364\377\377<G\33\0"..., 80582589, 1369723301) = 80582589 | |
[pid 182526] pread64(7, "\25\4\25\302\232\200\1\25\272\350`L\25\256\250\22\25\0\22\0\0\241\215@\30\0\0\0\0002yv"..., 105865224, 175318765) = 105865224 | |
[pid 182524] pread64(7, "\25\4\25\230\204\200\1\25\204\205\200\1L\25\206\201 \25\0\22\0\0\214\202@\364\377\377\203Cb\2"..., 32000712, 1337722478) = 32000712 | |
[pid 182532] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\2\0\0\0\3\0\0\0\4\0\0\0\5\0\0\0"..., 19789058, 1493240749) = 19789058 | |
[pid 182528] pread64(7, "\25\4\25\200\3520\25\260\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377(\365\0\0\210\20"..., 42934648, 1450306000) = 42934648 | |
[pid 182526] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\2\5\6\4\0\4\t\7\r\1\0"..., 27895361, 1618893128) = 27895361 | |
[pid 182524] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'c'\0\0u'\0\0"..., 30046191, 1646788575) = 30046191 | |
[pid 182532] pread64(7, "\25\4\25\220\232\1\25\232\232\1L\25\304&\25\0\22\0\0\210M\364\207&\202'\0\0X'\0\0"..., 30045951, 1676834868) = 30045951 | |
[pid 182530] pread64(7, "\25\4\25\250\274\200\1\25\362\201aL\25\230\255\22\25\0\22\0\0\224\236@\30\0\0\0\0\25 g"..., 105864556, 844195126) = 105864556 | |
[pid 182526] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578500, 1736927331) = 12578500 | |
[pid 182528] pread64(7, "\25\4\25\320\237\1\25\332\237\1L\25\364'\25\0\22\0\0\350O\364\347'f'\0\0\203'\0\0"..., 30046303, 1706880923) = 30046303 | |
[pid 182529] pread64(7, "\25\4\25\346\201\200\1\25\312\324`L\25\352\244\22\25\0\22\0\0\363\200@\30\0\0\0\0e`R"..., 105863090, 1513029909) = 105863090 | |
[pid 182527] pread64(7, "\25\4\25\236\315\200\1\25\210\2116L\25\352\226\4\25\0\22\0\0\317\246@\360O\27\0\0\0eg"..., 257055150, 411800685) = 257055150 | |
[pid 182531] pread64(7, "\25\4\25\252\315\201\1\25\312\3166L\25\270\233\4\25\0\22\0\0\325\346@\300\27\0\0\0ull"..., 257027785, 1080694556) = 257027785 | |
[pid 182524] pread64(7, "\25\4\25\342\333\202\1\25\342\3756L\25\310\237\4\25\0\22\0\0\361\255A\300\f\0\0\0tly"..., 257049221, 1749505921) = 257049221 | |
[pid 182543] close(7) = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(arrow-release-12) pace@pace-desktop:~/dev/experiments/parquet-partial-read$ strace --quiet=attach,exit -z -f -P /home/pace/dev/data/lineitem_10.parquet python one_column_parquet.py | |
[pid 182444] stat("/home/pace/dev/data/lineitem_10.parquet", {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182444] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7 | |
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182463] pread64(7, "\1\377\241=\0211\22\341,\r%\16\241\10Q\243E\30A\212&\300\tI\310\16\341W!\210\16\206"..., 65536, 2006496747) = 65536 | |
[pid 182444] close(7) = 0 | |
[pid 182444] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7 | |
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0 | |
[pid 182444] fadvise64(7, 31980792, 80584237, POSIX_FADV_WILLNEED) = 0 | |
[pid 182444] fadvise64(7, 700852406, 80583996, POSIX_FADV_WILLNEED) = 0 | |
[pid 182444] fadvise64(7, 1369723301, 80582589, POSIX_FADV_WILLNEED) = 0 | |
[pid 182463] pread64(7, "\25\4\25\270\250\200\1\25\244\251\200\1L\25\216\212 \25\0\22\0\0\234\224@\364\377\377\26\256\27\0"..., 80584237, 31980792) = 80584237 | |
[pid 182465] pread64(7, "\25\4\25\350\253\200\1\25\324\254\200\1L\25\372\212 \25\0\22\0\0\364\225@\364\377\377\376\207\23\0"..., 80583996, 700852406) = 80583996 | |
[pid 182466] pread64(7, "\25\4\25\300\203\200\1\25\252\204\200\1L\25\360\200 \25\0\22\0\0\340\201@\364\377\377<G\33\0"..., 80582589, 1369723301) = 80582589 | |
[pid 182470] close(7) = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow.parquet as pq | |
pq.read_table("/home/pace/dev/data/lineitem_10.parquet") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment