#
# This script builds a database containing credentials, in order to check if email+password have leak.
# For a few searchs, the grep command is perfect. No need of an aircraft carrier for fishing in a lake. ;)
# Data source contains 1.4 billon (yes, billon!) passwords from 4iQ (41GB large)
#
# PS: Use. Don't abuse !
#
mkdir 1.4B-password-dump/
cd 1.4B-password-dump/
# Download dump
aria2c 'magnet:?xt=urn:btih:7ffbcd8cee06aba2ce6561688cf68ce2addca0a3&dn=BreachCompilation&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fglotorrents.pw%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337'
# start a postgres instance
docker run -d --name password-dump \
-p 5432:5432 \
-v ./BreachCompilation/data:/data \
-e POSTGRES_USER=dump \
-e POSTGRES_PASSWORD=dump \
-e POSTGRES_DB=dump \
postgres:10
# get a psql prompt
docker exec -it password-dump psql postgres://dump:dump@localhost:5432/dump
# Create a table
CREATE TABLE credentials (
email VARCHAR NOT NULL,
pass VARCHAR NOT NULL
);
# Create an index for fast querying
CREATE INDEX credentials_email ON credentials (email);
CREATE INDEX credentials_pass ON credentials (pass);
# Run next commands into tmux. Dump import will take many hours ;)
tmux
# The \copy command from postgres is too strict for raw data
# \copy credentials(email, pass) FROM '/data/0/0' DELIMITER ':' CSV;
# Run a go script instead and pipe results into pg
tree -CfiF ./BreachCompilation/data \
| grep -v '/$' \
| xargs cat \
| go run parser.go \
| docker exec -i password-dump psql postgres://dump:dump@localhost:5432/dump
# Command to keep track of import on postgres side (unfortunately, it may slow down import)
\watch 'SELECT COUNT(*) FROM credentials;'
Last active
September 28, 2024 08:55
-
-
Save samber/5a7510cf1d8c39493559b0d80445efd7 to your computer and use it in GitHub Desktop.
Password leak check (data source: 4iQ - Dec 2017 - 1.4 billion creds)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"os" | |
"strings" | |
) | |
func print_row(line string) string { | |
// ':' delimiter | |
creds := strings.SplitN(line, ":", 2) | |
// ';' delimiter | |
if len(creds) != 2 { | |
creds = strings.SplitN(line, ";", 2) | |
if len(creds) != 2 { | |
return "" | |
} | |
} | |
// print sql insert | |
return fmt.Sprintf( | |
"('%s', '%s')", | |
strings.Replace(creds[0], "'", "''", 42), | |
strings.Replace(creds[1], "'", "''", 42), | |
) | |
} | |
func main() { | |
total := 0 | |
batch_size := 1000 | |
fmt.Println("set client_encoding to 'UTF8';") | |
reader := bufio.NewReader(os.Stdin) | |
for { | |
insert := []string{} | |
fmt.Println("INSERT INTO credentials (email, pass) VALUES") | |
for i := 0; i < batch_size; i++ { | |
line, err := reader.ReadString('\n') | |
if err != nil { | |
fmt.Fprintln(os.Stderr, "reading standard input:", err) | |
if err == io.EOF { | |
os.Exit(0) | |
} | |
} | |
line = strings.Replace(line, "\r", "", 42) | |
line = strings.Replace(line, "\n", "", 42) | |
if len(line) == 0 { | |
continue | |
} | |
row := print_row(line) | |
if row != "" { | |
insert = append(insert, row) | |
total++ | |
} | |
} | |
fmt.Println(strings.Join(insert, ", ")) | |
fmt.Println(";") | |
fmt.Fprintln(os.Stderr, total) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment