Skip to content

Instantly share code, notes, and snippets.

@nflatrea
Created January 16, 2025 15:15
Show Gist options
  • Save nflatrea/c95a20bd1dee7baf09555525c18435ef to your computer and use it in GitHub Desktop.
Save nflatrea/c95a20bd1dee7baf09555525c18435ef to your computer and use it in GitHub Desktop.
scann : Python Malware scanner based on Hash values
#!/usr/bin/python
import argparse
import hashlib
import os
import concurrent.futures
import json
from datetime import datetime
def compute_hash(file_path, hash_algorithm):
hash_func = getattr(hashlib, hash_algorithm)()
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
hash_func.update(chunk)
return hash_func.hexdigest()
def scan_file(file_path, hash_dict, hash_algorithms):
results = []
for algorithm in hash_algorithms:
file_hash = compute_hash(file_path, algorithm)
match_found = file_hash in hash_dict
results.append({
'datetime': datetime.now().isoformat(),
'path': file_path,
'match': match_found,
'hash': {
'type': algorithm,
'value': file_hash
}
})
return results
def load_hash_dict(hash_file):
hash_dict = set()
with open(hash_file, 'r') as f:
for line in f:
hash_dict.add(line.strip())
return hash_dict
def scan_directory(directory, hash_dict, hash_algorithms, output_file=None, output_format='txt'):
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
futures.append(executor.submit(scan_file, file_path, hash_dict, hash_algorithms))
all_results = []
for future in concurrent.futures.as_completed(futures):
all_results.extend(future.result())
if output_file:
with open(output_file, 'w') as log_file:
if output_format == 'json':
json.dump(all_results, log_file, indent=4)
elif output_format == 'html':
log_file.write(format_html_output(all_results))
else:
for result in all_results:
log_file.write(format_txt_output(result) + '\n')
else:
if output_format == 'json':
print(json.dumps(all_results, indent=4))
elif output_format == 'html':
print(format_html_output(all_results))
else:
for result in all_results:
print(format_txt_output(result))
def format_txt_output(result):
match_status = 'match found' if result['match'] else 'no match found'
return f"[{result['datetime']}] {result['path']} : {match_status} with {result['hash']['type']} {result['hash']['value']}"
def format_html_output(results):
html_output = "<html><body><ul>"
for result in results:
match_status = 'match found' if result['match'] else 'no match found'
color = 'red' if result['match'] else 'black'
html_output += f"<li style='color: {color};'>[{result['datetime']}] {result['path']} : {match_status} with {result['hash']['type']} {result['hash']['value']}</li>"
html_output += "</ul></body></html>"
return html_output
def main():
parser = argparse.ArgumentParser(description='scanm is a malware scanning tool based on hash dictionaries.')
parser.add_argument('-H', '--hashes', required=True, help='Path to the hash dictionary file')
parser.add_argument('-o', '--output', help='Path to the output log file')
parser.add_argument('-f', '--format', choices=['json', 'txt', 'html'], default='txt', help='Output format (json, txt, or html)')
parser.add_argument('-md5', action='store_true', help='Use MD5 hash algorithm')
parser.add_argument('-sha256', action='store_true', help='Use SHA256 hash algorithm')
parser.add_argument('input', help='Input file or directory to scan')
args = parser.parse_args()
hash_algorithms = []
if args.md5:
hash_algorithms.append('md5')
if args.sha256:
hash_algorithms.append('sha256')
if not hash_algorithms:
hash_algorithms.append('sha256') # Default to SHA256 if no algorithm is specified
hash_dict = load_hash_dict(args.hashes)
if os.path.isdir(args.input):
scan_directory(args.input, hash_dict, hash_algorithms, args.output, args.format)
elif os.path.isfile(args.input):
results = scan_file(args.input, hash_dict, hash_algorithms)
if args.output:
with open(args.output, 'w') as log_file:
if args.format == 'json':
json.dump(results, log_file, indent=4)
elif args.format == 'html':
log_file.write(format_html_output(results))
else:
for result in results:
log_file.write(format_txt_output(result) + '\n')
else:
if args.format == 'json':
print(json.dumps(results, indent=4))
elif args.format == 'html':
print(format_html_output(results))
else:
for result in results:
print(format_txt_output(result))
else:
print(f"Error: {args.input} is not a valid file or directory.")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment