# 说明文档 # https://blog.csdn.net/duxin_csdn/article/details/88966295 # 库文件位置 # https://github.com/kylebgorman/textgrid # 解析代码 import textgrid def is_chinese(uchar): """判断一个unicode是否是汉字""" if (uchar >= u'\u4e00') and (uchar<=u'\u9fa5'): return True else: return False tg = textgrid.TextGrid() tg.read('./REC0001.textgrid') for item in tg: # print(item.name, item.intervals, len(item.intervals)) for i in item.intervals: print(i.mark, type(i.mark), type(i.mark.encode('utf-8'))) # 文件导入应有格式 """ File type = "ooTextFile" Object class = "TextGrid" xmin = 0 xmax = 144.43 tiers? <exists> size = 3 item []: item [1]: class = "IntervalTier" name = "GLOBAL" xmin = 0 xmax = 144.43 intervals: size = 1 intervals [1]: xmin = 0 xmax = 144.43 text = "[speaker]: 1 male, 2 male, 3 male; [language]: 1 普通话, 2 普通话, 3 普通话" item [2]: class = "IntervalTier" name = "SPEAKER" xmin = 0 xmax = 144.43 intervals: size = 2 intervals [1]: xmin = 0 xmax = 1.1184741690417594 text = "" intervals [2]: xmin = 1.1184741690417594 xmax = 1.7980652315593901 text = "3" item [2]: class = "IntervalTier" name = "CONTENT" xmin = 0 xmax = 144.43 intervals: size = 2 intervals [1]: xmin = 0 xmax = 1.1184741690417594 text = "[ENS]" intervals [2]: xmin = 1.1184741690417594 xmax = 1.7980652315593901 text = "[UNK]你好" intervals [3]: xmin = 1.7980652315593901 xmax = 2.7925887376827525 text = "[ENS]" intervals [4]: xmin = 2.7925887376827525 xmax = 3.604782934350176 text = "[*]" """