Created
January 16, 2025 15:15
-
-
Save nflatrea/c95a20bd1dee7baf09555525c18435ef to your computer and use it in GitHub Desktop.
scann : Python Malware scanner based on Hash values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import argparse | |
import hashlib | |
import os | |
import concurrent.futures | |
import json | |
from datetime import datetime | |
def compute_hash(file_path, hash_algorithm): | |
hash_func = getattr(hashlib, hash_algorithm)() | |
with open(file_path, 'rb') as f: | |
while chunk := f.read(8192): | |
hash_func.update(chunk) | |
return hash_func.hexdigest() | |
def scan_file(file_path, hash_dict, hash_algorithms): | |
results = [] | |
for algorithm in hash_algorithms: | |
file_hash = compute_hash(file_path, algorithm) | |
match_found = file_hash in hash_dict | |
results.append({ | |
'datetime': datetime.now().isoformat(), | |
'path': file_path, | |
'match': match_found, | |
'hash': { | |
'type': algorithm, | |
'value': file_hash | |
} | |
}) | |
return results | |
def load_hash_dict(hash_file): | |
hash_dict = set() | |
with open(hash_file, 'r') as f: | |
for line in f: | |
hash_dict.add(line.strip()) | |
return hash_dict | |
def scan_directory(directory, hash_dict, hash_algorithms, output_file=None, output_format='txt'): | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
futures = [] | |
for root, _, files in os.walk(directory): | |
for file in files: | |
file_path = os.path.join(root, file) | |
futures.append(executor.submit(scan_file, file_path, hash_dict, hash_algorithms)) | |
all_results = [] | |
for future in concurrent.futures.as_completed(futures): | |
all_results.extend(future.result()) | |
if output_file: | |
with open(output_file, 'w') as log_file: | |
if output_format == 'json': | |
json.dump(all_results, log_file, indent=4) | |
elif output_format == 'html': | |
log_file.write(format_html_output(all_results)) | |
else: | |
for result in all_results: | |
log_file.write(format_txt_output(result) + '\n') | |
else: | |
if output_format == 'json': | |
print(json.dumps(all_results, indent=4)) | |
elif output_format == 'html': | |
print(format_html_output(all_results)) | |
else: | |
for result in all_results: | |
print(format_txt_output(result)) | |
def format_txt_output(result): | |
match_status = 'match found' if result['match'] else 'no match found' | |
return f"[{result['datetime']}] {result['path']} : {match_status} with {result['hash']['type']} {result['hash']['value']}" | |
def format_html_output(results): | |
html_output = "<html><body><ul>" | |
for result in results: | |
match_status = 'match found' if result['match'] else 'no match found' | |
color = 'red' if result['match'] else 'black' | |
html_output += f"<li style='color: {color};'>[{result['datetime']}] {result['path']} : {match_status} with {result['hash']['type']} {result['hash']['value']}</li>" | |
html_output += "</ul></body></html>" | |
return html_output | |
def main(): | |
parser = argparse.ArgumentParser(description='scanm is a malware scanning tool based on hash dictionaries.') | |
parser.add_argument('-H', '--hashes', required=True, help='Path to the hash dictionary file') | |
parser.add_argument('-o', '--output', help='Path to the output log file') | |
parser.add_argument('-f', '--format', choices=['json', 'txt', 'html'], default='txt', help='Output format (json, txt, or html)') | |
parser.add_argument('-md5', action='store_true', help='Use MD5 hash algorithm') | |
parser.add_argument('-sha256', action='store_true', help='Use SHA256 hash algorithm') | |
parser.add_argument('input', help='Input file or directory to scan') | |
args = parser.parse_args() | |
hash_algorithms = [] | |
if args.md5: | |
hash_algorithms.append('md5') | |
if args.sha256: | |
hash_algorithms.append('sha256') | |
if not hash_algorithms: | |
hash_algorithms.append('sha256') # Default to SHA256 if no algorithm is specified | |
hash_dict = load_hash_dict(args.hashes) | |
if os.path.isdir(args.input): | |
scan_directory(args.input, hash_dict, hash_algorithms, args.output, args.format) | |
elif os.path.isfile(args.input): | |
results = scan_file(args.input, hash_dict, hash_algorithms) | |
if args.output: | |
with open(args.output, 'w') as log_file: | |
if args.format == 'json': | |
json.dump(results, log_file, indent=4) | |
elif args.format == 'html': | |
log_file.write(format_html_output(results)) | |
else: | |
for result in results: | |
log_file.write(format_txt_output(result) + '\n') | |
else: | |
if args.format == 'json': | |
print(json.dumps(results, indent=4)) | |
elif args.format == 'html': | |
print(format_html_output(results)) | |
else: | |
for result in results: | |
print(format_txt_output(result)) | |
else: | |
print(f"Error: {args.input} is not a valid file or directory.") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment