Skip to content

Instantly share code, notes, and snippets.

@samber
Last active September 28, 2024 08:55
Show Gist options
  • Save samber/5a7510cf1d8c39493559b0d80445efd7 to your computer and use it in GitHub Desktop.
Save samber/5a7510cf1d8c39493559b0d80445efd7 to your computer and use it in GitHub Desktop.
Password leak check (data source: 4iQ - Dec 2017 - 1.4 billion creds)
#
# This script builds a database containing credentials, in order to check if email+password have leak.
# For a few searchs, the grep command is perfect. No need of an aircraft carrier for fishing in a lake. ;)
# Data source contains 1.4 billon (yes, billon!) passwords from 4iQ (41GB large)
#
# PS: Use. Don't abuse !
#

mkdir 1.4B-password-dump/
cd 1.4B-password-dump/

# Download dump
aria2c 'magnet:?xt=urn:btih:7ffbcd8cee06aba2ce6561688cf68ce2addca0a3&dn=BreachCompilation&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fglotorrents.pw%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337'

# start a postgres instance
docker run -d --name password-dump \
  -p 5432:5432 \
  -v ./BreachCompilation/data:/data \
  -e POSTGRES_USER=dump \
  -e POSTGRES_PASSWORD=dump \
  -e POSTGRES_DB=dump \
  postgres:10

# get a psql prompt
docker exec -it password-dump psql postgres://dump:dump@localhost:5432/dump

# Create a table
CREATE TABLE credentials (
  email VARCHAR NOT NULL,
  pass VARCHAR NOT NULL
);

# Create an index for fast querying
CREATE INDEX credentials_email ON credentials (email);
CREATE INDEX credentials_pass ON credentials (pass);

# Run next commands into tmux. Dump import will take many hours ;)
tmux

# The \copy command from postgres is too strict for raw data
# \copy credentials(email, pass) FROM '/data/0/0' DELIMITER ':' CSV;

# Run a go script instead and pipe results into pg
tree -CfiF ./BreachCompilation/data \
    | grep -v '/$' \
    | xargs cat \
    | go run parser.go \
    | docker exec -i password-dump psql postgres://dump:dump@localhost:5432/dump

# Command to keep track of import on postgres side (unfortunately, it may slow down import)
\watch 'SELECT COUNT(*) FROM credentials;'
package main
import (
"bufio"
"fmt"
"io"
"os"
"strings"
)
func print_row(line string) string {
// ':' delimiter
creds := strings.SplitN(line, ":", 2)
// ';' delimiter
if len(creds) != 2 {
creds = strings.SplitN(line, ";", 2)
if len(creds) != 2 {
return ""
}
}
// print sql insert
return fmt.Sprintf(
"('%s', '%s')",
strings.Replace(creds[0], "'", "''", 42),
strings.Replace(creds[1], "'", "''", 42),
)
}
func main() {
total := 0
batch_size := 1000
fmt.Println("set client_encoding to 'UTF8';")
reader := bufio.NewReader(os.Stdin)
for {
insert := []string{}
fmt.Println("INSERT INTO credentials (email, pass) VALUES")
for i := 0; i < batch_size; i++ {
line, err := reader.ReadString('\n')
if err != nil {
fmt.Fprintln(os.Stderr, "reading standard input:", err)
if err == io.EOF {
os.Exit(0)
}
}
line = strings.Replace(line, "\r", "", 42)
line = strings.Replace(line, "\n", "", 42)
if len(line) == 0 {
continue
}
row := print_row(line)
if row != "" {
insert = append(insert, row)
total++
}
}
fmt.Println(strings.Join(insert, ", "))
fmt.Println(";")
fmt.Fprintln(os.Stderr, total)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment