Created
May 16, 2025 12:43
-
-
Save devig/1276c46637efe15764e4c01e9e7addf9 to your computer and use it in GitHub Desktop.
Оставляем только уникальные строки
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
def extract_unique_lines(input_file): | |
seen = set() | |
unique_lines = [] | |
total_lines = 0 | |
with open(input_file, 'r', encoding='utf-8') as f: | |
for line in f: | |
total_lines += 1 | |
original = line.rstrip('\n').strip() | |
normalized = original.lower() | |
if normalized not in seen: | |
seen.add(normalized) | |
unique_lines.append(original) | |
return total_lines, unique_lines | |
def write_unique_lines(output_file, lines): | |
with open(output_file, 'w', encoding='utf-8') as f: | |
for line in lines: | |
f.write(line + '\n') | |
def main(): | |
if len(sys.argv) != 2: | |
print("Использование: python uniq.py input.txt") | |
sys.exit(1) | |
input_path = sys.argv[1] | |
if not os.path.isfile(input_path): | |
print(f"Файл не найден: {input_path}") | |
sys.exit(1) | |
base_name = os.path.basename(input_path) | |
name, ext = os.path.splitext(base_name) | |
output_path = f"uniq_{name}{ext}" | |
total_lines, unique_lines = extract_unique_lines(input_path) | |
write_unique_lines(output_path, unique_lines) | |
print(f"Было строк: {total_lines}") | |
print(f"Стало строк: {len(unique_lines)}") | |
print(f"Уникальные строки записаны в файл: {output_path}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python3 uniq.py minus-kondei.txt