Created
January 18, 2018 08:08
-
-
Save Loveforkeeps/2f1b97b44611c962ad07af7300ac0664 to your computer and use it in GitHub Desktop.
文件按行去重
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding:utf-8 | |
import io | |
import sys | |
import os | |
def uniq_set(file): | |
u_set = set() | |
with io.open(file,"r") as f: | |
for line in f.read().splitlines(): | |
if line == "": | |
continue | |
u_set.add(line) | |
# print line | |
with io.open(file,"w") as f1: | |
for line in u_set: | |
f1.writelines(line+'\n') | |
print(u"%s :deduplicate succesful!" % file) | |
def uniq_set2(file): | |
lines_seen = set() | |
outfile = open(file+"_dedup", "w") | |
for line in open(file, "r"): | |
if line not in lines_seen: | |
outfile.write(line) | |
lines_seen.add(line) | |
outfile.close() | |
def main(): | |
if len(sys.argv) == 2: | |
uniq_set(sys.argv[1]) | |
else: | |
print(u"Please asign a file!") | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment