Skip to content

Instantly share code, notes, and snippets.

@CJHwong
Last active March 27, 2024 17:22

Revisions

  1. CJHwong revised this gist Mar 27, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion README.md
    Original file line number Diff line number Diff line change
    @@ -7,7 +7,7 @@ mysqldump --no-data $DB_NAME --result-file myschema.sql

    Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
    ```sh
    ./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db
    ./mysql2sqlite myschema.sql | sqlite3 mysqlite3.db
    ```

    Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
  2. CJHwong revised this gist Mar 27, 2024. 1 changed file with 13 additions and 5 deletions.
    18 changes: 13 additions & 5 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -1,13 +1,21 @@
    # Origin: https://news.ycombinator.com/item?id=39798115
    Origin: https://news.ycombinator.com/item?id=39798115

    # Replace the database name by yours
    Replace the database name by yours
    ```sh
    mysqldump --no-data $DB_NAME --result-file myschema.sql
    ```

    # Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
    Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
    ```sh
    ./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db
    ```

    # Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
    Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
    ```sh
    sqlite3 ./mysqlite3.db -init sqlite-schema-diagram.sql "" > schema.dot
    ```

    # Install Graphviz
    Install Graphviz
    ```sh
    dot -Tsvg schema.dot > schema.svg
    ```
  3. CJHwong renamed this gist Mar 27, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. CJHwong renamed this gist Mar 27, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  5. CJHwong revised this gist Mar 27, 2024. 3 changed files with 461 additions and 0 deletions.
    289 changes: 289 additions & 0 deletions mysql2sqlite
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,289 @@
    #!/usr/bin/awk -f

    # Authors: @esperlu, @artemyk, @gkuenning, @dumblob

    # FIXME detect empty input file and issue a warning

    function printerr( s ){ print s | "cat >&2" }

    BEGIN {
    if( ARGC != 2 ){
    printerr( \
    "USAGE:\n"\
    " mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
    " OR\n" \
    " mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \
    "\n" \
    "NOTES:\n" \
    " Dash in filename is not supported, because dash (-) means stdin." )
    no_END = 1
    exit 1
    }

    # Find INT_MAX supported by both this AWK (usually an ISO C signed int)
    # and SQlite.
    # On non-8bit-based architectures, the additional bits are safely ignored.

    # 8bit (lower precision should not exist)
    s="127"
    # "63" + 0 avoids potential parser misbehavior
    if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
    # 16bit
    s="32767"
    if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
    # 32bit
    s="2147483647"
    if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
    # 64bit (as INTEGER in SQlite3)
    s="9223372036854775807"
    if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
    # # 128bit
    # s="170141183460469231731687303715884105728"
    # if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
    # # 256bit
    # s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
    # if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
    # # 512bit
    # s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
    # if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
    # # 1024bit
    # s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
    # if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
    # # higher precision probably not needed

    FS=",$"
    print "PRAGMA synchronous = OFF;"
    print "PRAGMA journal_mode = MEMORY;"
    print "BEGIN TRANSACTION;"
    }

    # historically 3 spaces separate non-argument local variables
    function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){
    powtwo = 1
    overflow = 0
    # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
    for( i = length( str_bit ); i > 0; --i ){
    bit = substr( str_bit, i, 1 )
    if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
    printerr( \
    NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
    break
    }
    res = res + bit * powtwo
    # no warning here as it might be the last iteration
    if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
    powtwo = powtwo * 2
    }
    return res
    }

    # CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
    /^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
    gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
    print
    inTrigger = 1
    next
    }
    # The end of CREATE TRIGGER has a stray comment terminator
    /(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next }
    # The rest of triggers just get passed through
    inTrigger != 0 { print; next }

    # CREATE VIEW looks like a TABLE in comments
    /^\/\*.*(CREATE.*TABLE|create.*table)/ {
    inView = 1
    next
    }
    # end of CREATE VIEW
    /^(\).*(ENGINE|engine).*\*\/;)/ {
    inView = 0
    next
    }
    # content of CREATE VIEW
    inView != 0 { next }

    # skip comments
    /^\/\*/ { next }

    # skip PARTITION statements
    /^ *[(]?(PARTITION|partition) +[^ ]+/ { next }

    # print all INSERT lines
    ( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ {
    prev = ""

    # first replace \\ by \_ that mysqldump never generates to deal with
    # sequnces like \\n that should be translated into \n, not \<LF>.
    # After we convert all escapes we replace \_ by backslashes.
    gsub( /\\\\/, "\\_" )

    # single quotes are escaped by another single quote
    gsub( /\\'/, "''" )
    gsub( /\\n/, "\n" )
    gsub( /\\r/, "\r" )
    gsub( /\\"/, "\"" )
    gsub( /\\\032/, "\032" ) # substitute char

    gsub( /\\_/, "\\" )

    # sqlite3 is limited to 16 significant digits of precision
    while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
    hexIssue = 1
    sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
    }
    if( hexIssue ){
    printerr( \
    NR ": WARN Hex number trimmed (length longer than 16 chars)." )
    hexIssue = 0
    }
    print
    next
    }

    # CREATE DATABASE is not supported
    /^(CREATE DATABASE|create database)/ { next }

    # print the CREATE line as is and capture the table name
    /^(CREATE|create)/ {
    if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
    caseIssue = 1
    printerr( \
    NR ": WARN Potential case sensitivity issues with table/column naming\n" \
    " (see INFO at the end)." )
    }
    if( match( $0, /`[^`]+/ ) ){
    tableName = substr( $0, RSTART+1, RLENGTH-1 )
    }
    aInc = 0
    prev = ""
    firstInTable = 1
    print
    next
    }

    # Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
    /^ (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, " KEY" ) }

    # Get rid of field lengths in KEY lines
    / (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) }

    aInc == 1 && /PRIMARY KEY|primary key/ { next }

    # Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
    / (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) }

    # Print all fields definition lines except the `KEY` lines.
    /^ / && !/^( (KEY|key)|\);)/ {
    if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
    aInc = 1
    gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
    }
    gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
    gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
    # FIXME
    # CREATE TRIGGER [UpdateLastTime]
    # AFTER UPDATE
    # ON Package
    # FOR EACH ROW
    # BEGIN
    # UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
    # END
    gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" )
    gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
    gsub( /(COLLATE|collate) [^ ]+ /, "" )
    gsub( /(ENUM|enum)[^)]+\)/, "text " )
    gsub( /(SET|set)\([^)]+\)/, "text " )
    gsub( /UNSIGNED|unsigned/, "" )
    gsub( /_utf8mb3/, "" )
    gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
    gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
    ere_bit_field = "[bB]'[10]+'"
    if( match($0, ere_bit_field) ){
    sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
    }

    # remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY
    # `hostname_domain` (`hostname`,`domain`) USING BTREE,"
    gsub( / USING [^, ]+/, "" )

    # field comments are not supported
    gsub( / (COMMENT|comment).+$/, "" )
    # Get commas off end of line
    gsub( /,.?$/, "" )
    if( prev ){
    if( firstInTable ){
    print prev
    firstInTable = 0
    }
    else {
    print "," prev
    }
    }
    else {
    # FIXME check if this is correct in all cases
    if( match( $1,
    /(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){
    print ","
    }
    }
    prev = $1
    }

    / ENGINE| engine/ {
    if( prev ){
    if( firstInTable ){
    print prev
    firstInTable = 0
    }
    else {
    print "," prev
    }
    }
    prev=""
    print ");"
    next
    }
    # `KEY` lines are extracted from the `CREATE` block and stored in array for later print
    # in a separate `CREATE KEY` command. The index name is prefixed by the table name to
    # avoid a sqlite error for duplicate index name.
    /^( (KEY|key)|\);)/ {
    if( prev ){
    if( firstInTable ){
    print prev
    firstInTable = 0
    }
    else {
    print "," prev
    }
    }
    prev = ""
    if( $0 == ");" ){
    print
    }
    else {
    if( match( $0, /`[^`]+/ ) ){
    indexName = substr( $0, RSTART+1, RLENGTH-1 )
    }
    if( match( $0, /\([^()]+/ ) ){
    indexKey = substr( $0, RSTART+1, RLENGTH-1 )
    }
    # idx_ prefix to avoid name clashes (they really happen!)
    key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
    tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
    }
    }

    END {
    if( no_END ){ exit 1}
    # print all KEY creation lines.
    for( table in key ){ printf key[table] }

    print "END TRANSACTION;"

    if( caseIssue ){
    printerr( \
    "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
    " or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
    " identifiers. Thus expect errors like \"table T has no column named F\".")
    }
    }
    172 changes: 172 additions & 0 deletions sqlite-schema-diagram.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,172 @@
    -- We start a GraphViz graph
    SELECT '
    digraph structs {
    '
    UNION ALL

    -- Normally, GraphViz' "dot" command lays out a hierarchical graph from
    -- top to bottom. However, we aren't just laying out individual nodes,
    -- each node is a vertical list of database fields. To prevent GraphViz
    -- from snaking arrows all over the place, we constrain it to draw
    -- incoming references on the left of each field, and outgoing references
    -- on the right. Since that's the way references flow for each database
    -- table, we tell GraphViz to lay the whole graph out left-to-right,
    -- which makes its job much easier and produces prettier output.
    SELECT '
    rankdir="LR"
    '
    UNION ALL


    -- By default, nodes have circles around them. We will draw our own
    -- tables below, we do not want the circles.
    SELECT '
    node [shape=none]
    '
    UNION ALL

    -- This is the big query that renders a node complete with field names
    -- for each table in the database. Because we want raw GraphViz output,
    -- our query returns rows with a single string field, whose value is a
    -- complex calculation using SQL as a templating engine. This is kind
    -- of an abuse, but works nicely nevertheless.
    SELECT
    CASE
    -- When the previous row's table name is the same as this one,
    -- do nothing.
    WHEN LAG(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

    -- Otherwise, this is the first row of a new table, so start
    -- the node markup and add a header row. Normally in GraphViz,
    -- the table name would *be* the label of the node, but since
    -- we're using the label to represent the entire node, we have
    -- to make our own header.
    --
    -- GraphViz does have a "record" label shape, but it seems tricky
    -- to work with and I found the HTML-style label markup easier
    -- to get working the way I wanted.
    ELSE
    t.name || ' [label=<
    <TABLE BORDER="0" CELLSPACING="0" CELLBORDER="1">
    <TR>
    <TD COLSPAN="2"><B>' || t.name || '</B></TD>
    </TR>
    '

    -- After the header (if needed), we have rows for each field in
    -- the table.
    --
    -- The "pk" metadata field is zero for table fields that are not part
    -- of the primary key. If the "pk" metadata field is 1 or more, that
    -- tells you that table field's order in the (potentially composite)
    -- primary key.
    --
    -- We also add ports to each of the table cells, so that we can
    -- later tell GraphViz to specifically connect the ports representing
    -- specific fields in each table, instead of connecting the tables
    -- generally.
    END || '
    <TR>
    <TD PORT="' || i.name || '_to">' ||
    CASE i.pk WHEN 0 THEN '&nbsp;' ELSE '🔑' END ||
    '</TD>
    <TD PORT="' || i.name || '_from">' || i.name || '</TD>
    </TR>
    ' ||
    CASE
    -- When the next row's table name is the same as this one,
    -- do nothing.
    WHEN LEAD(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

    -- Otherwise, this is the last row of a database table, so end
    -- the table markup.
    ELSE '
    </TABLE>
    >];
    '
    END

    -- This is how you get nice relational data out of SQLite's metadata
    -- pragmas.
    FROM pragma_table_list() AS t
    JOIN pragma_table_info(t.name, t.schema) AS i

    WHERE
    -- SQLite has a bunch of metadata tables in each schema, which
    -- are hidden from .tables and .schema but which are reported
    -- in pragma_table_list(). They're not user-created and almost
    -- certainly user databases don't have foreign keys to them, so
    -- let's just filter them out.
    t.name NOT LIKE 'sqlite_%'

    -- Despite its name, pragma_table_list() also includes views.
    -- Since those don't store any information or have any correctness
    -- constraints, they're just distracting if you're trying to quickly
    -- understand a database's schema, so we'll filter them out too.
    AND t.type = 'table'
    UNION ALL

    -- Now we have all the database tables set up, we can draw the links
    -- between them. SQLite gives us the pragma_foreign_key_list() function
    -- which (for a given source table) lists all the source fields that are
    -- part of a foreign key reference, the target table they refer to, and
    -- (if it was created with "REFERENCES table_name(column_name)" syntax,
    -- the target column names too. Unfortunately, if the reference was
    -- created with "REFERENCES table_name" syntax, the pragma does *not*
    -- figure out what the corresponding target fields are, so we'll also need
    -- pragma_table_info() to look up the primary key(s) for the target table.
    --
    -- Once we have everything we need, we just do a bit more string
    -- concatenation to build up the GraphViz syntax equivalent.
    --
    -- Note that we use the ports we defined above, as well as the directional
    -- overrides :e and :w, to force GraphViz to give us a layout that's
    -- likely to be readable.
    SELECT

    -- We left-join every foreign key field against pragma_table_info
    -- looking for primary keys, and the target table may have a composite
    -- primary key even if the foreign key does not reference the primary
    -- key, so we may wind up with multiple results describing the same
    -- foreign key reference. DISTINCT makes sure we only describe each
    -- reference once.
    DISTINCT

    t.name || ':' || f."from" || '_from:e -> ' ||

    -- If the constraint was created with "REFERENCES
    -- table_name(column_name)", then f.to will contain 'column_name'.
    -- Otherwise, f.to is NULL, and we need to grab the corresponding
    -- field from the primary key in i.name.
    f."table" || ':' || COALESCE(f."to", i.name) || '_to:w'

    FROM pragma_table_list() AS t
    JOIN pragma_foreign_key_list(t.name, t.schema) AS f

    -- We look up all the fields in the target table, just in case
    -- pragma_foreign_key_list() doesn't tell us what the target field
    -- name is. SQLite doesn't allow foreign-key references to cross
    -- schemas, so it's OK to use the source table's schema name to look
    -- up the target table.
    --
    -- Strictly speaking, we shouldn't need to LEFT JOIN here, a basic
    -- JOIN should do. This works around a bug in SQLite 3.16.0 to
    -- version 3.45.1: https://sqlite.org/forum/forumpost/b1656fcb39
    LEFT JOIN pragma_table_info(f."table", t.schema) AS i

    -- f.seq represents the order of fields in a source table's composite foreign key
    -- reference, starting at 0. In "FOREIGN KEY (a, b)", "a" would have
    -- seq=0 and "b" would have seq=1. i.pk represents the order of fields
    -- in a primary key, where "0" means "not part of the primary key".
    -- In "PRIMARY KEY (a, b)", "a" would have pk=1 and "b" would have pk=2.
    -- For a foreign key reference that specifies the target field name,
    -- none of this matters, but if the target field name is missing, then
    -- this makes sure that each field of the foreign key reference is joined
    -- with the corresponding primary key field of the target table.
    WHERE f.seq + 1 = i.pk

    UNION ALL

    -- Lastly, we close the GraphViz graph.
    SELECT '
    }';
    File renamed without changes.
  6. CJHwong created this gist Mar 27, 2024.
    13 changes: 13 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,13 @@
    # Origin: https://news.ycombinator.com/item?id=39798115

    # Replace the database name by yours
    mysqldump --no-data $DB_NAME --result-file myschema.sql

    # Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
    ./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db

    # Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
    sqlite3 ./mysqlite3.db -init sqlite-schema-diagram.sql "" > schema.dot

    # Install Graphviz
    dot -Tsvg schema.dot > schema.svg