CJHwong · March 27, 2024 17:22 · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ mysqldump --no-data $DB_NAME --result-file myschema.sql
 
 Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
 ```sh
-./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db
+./mysql2sqlite myschema.sql | sqlite3 mysqlite3.db
 ```
 
 Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram

diff --git a/README.md b/README.md
@@ -1,13 +1,21 @@
-# Origin: https://news.ycombinator.com/item?id=39798115
+Origin: https://news.ycombinator.com/item?id=39798115
 
-# Replace the database name by yours
+Replace the database name by yours
+```sh
 mysqldump --no-data $DB_NAME --result-file myschema.sql
+```
 
-# Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
+Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
+```sh
 ./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db
+```
 
-# Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
+Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
+```sh
 sqlite3 ./mysqlite3.db -init sqlite-schema-diagram.sql "" > schema.dot
+```
 
-# Install Graphviz
+Install Graphviz
+```sh
 dot -Tsvg schema.dot > schema.svg
+```
diff --git a/User Steps → README.md b/User Steps → README.md
diff --git a/steps → User Steps b/steps → User Steps
diff --git a/mysql2sqlite b/mysql2sqlite
@@ -0,0 +1,289 @@
+#!/usr/bin/awk -f
+
+# Authors: @esperlu, @artemyk, @gkuenning, @dumblob
+
+# FIXME detect empty input file and issue a warning
+
+function printerr( s ){ print s | "cat >&2" }
+
+BEGIN {
+  if( ARGC != 2 ){
+    printerr( \
+      "USAGE:\n"\
+      "       mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
+      "       OR\n" \
+      "       mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \
+      "\n" \
+      "NOTES:\n" \
+      "       Dash in filename is not supported, because dash (-) means stdin." )
+    no_END = 1
+    exit 1
+  }
+
+  # Find INT_MAX supported by both this AWK (usually an ISO C signed int)
+  #   and SQlite.
+  # On non-8bit-based architectures, the additional bits are safely ignored.
+
+  # 8bit (lower precision should not exist)
+  s="127"
+  # "63" + 0 avoids potential parser misbehavior
+  if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
+  # 16bit
+  s="32767"
+  if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
+  # 32bit
+  s="2147483647"
+  if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
+  # 64bit (as INTEGER in SQlite3)
+  s="9223372036854775807"
+  if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
+#  # 128bit
+#  s="170141183460469231731687303715884105728"
+#  if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
+#  # 256bit
+#  s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
+#  if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
+#  # 512bit
+#  s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
+#  if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
+#  # 1024bit
+#  s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
+#  if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
+#  # higher precision probably not needed
+
+  FS=",$"
+  print "PRAGMA synchronous = OFF;"
+  print "PRAGMA journal_mode = MEMORY;"
+  print "BEGIN TRANSACTION;"
+}
+
+# historically 3 spaces separate non-argument local variables
+function bit_to_int( str_bit,   powtwo, i, res, bit, overflow ){
+  powtwo = 1
+  overflow = 0
+  # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
+  for( i = length( str_bit ); i > 0; --i ){
+    bit = substr( str_bit, i, 1 )
+    if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
+      printerr( \
+        NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
+      break
+    }
+    res = res + bit * powtwo
+    # no warning here as it might be the last iteration
+    if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
+    powtwo = powtwo * 2
+  }
+  return res
+}
+
+# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
+/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
+  gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
+  print
+  inTrigger = 1
+  next
+}
+# The end of CREATE TRIGGER has a stray comment terminator
+/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next }
+# The rest of triggers just get passed through
+inTrigger != 0 { print; next }
+
+# CREATE VIEW looks like a TABLE in comments
+/^\/\*.*(CREATE.*TABLE|create.*table)/ {
+  inView = 1
+  next
+}
+# end of CREATE VIEW
+/^(\).*(ENGINE|engine).*\*\/;)/ {
+  inView = 0
+  next
+}
+# content of CREATE VIEW
+inView != 0 { next }
+
+# skip comments
+/^\/\*/ { next }
+
+# skip PARTITION statements
+/^ *[(]?(PARTITION|partition) +[^ ]+/ { next }
+
+# print all INSERT lines
+( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ {
+  prev = ""
+
+  # first replace \\ by \_ that mysqldump never generates to deal with
+  # sequnces like \\n that should be translated into \n, not \<LF>.
+  # After we convert all escapes we replace \_ by backslashes.
+  gsub( /\\\\/, "\\_" )
+
+  # single quotes are escaped by another single quote
+  gsub( /\\'/, "''" )
+  gsub( /\\n/, "\n" )
+  gsub( /\\r/, "\r" )
+  gsub( /\\"/, "\"" )
+  gsub( /\\\032/, "\032" )  # substitute char
+
+  gsub( /\\_/, "\\" )
+
+  # sqlite3 is limited to 16 significant digits of precision
+  while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
+    hexIssue = 1
+    sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
+  }
+  if( hexIssue ){
+    printerr( \
+      NR ": WARN Hex number trimmed (length longer than 16 chars)." )
+    hexIssue = 0
+  }
+  print
+  next
+}
+
+# CREATE DATABASE is not supported
+/^(CREATE DATABASE|create database)/ { next }
+
+# print the CREATE line as is and capture the table name
+/^(CREATE|create)/ {
+  if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
+    caseIssue = 1
+    printerr( \
+      NR ": WARN Potential case sensitivity issues with table/column naming\n" \
+      "          (see INFO at the end)." )
+  }
+  if( match( $0, /`[^`]+/ ) ){
+    tableName = substr( $0, RSTART+1, RLENGTH-1 )
+  }
+  aInc = 0
+  prev = ""
+  firstInTable = 1
+  print
+  next
+}
+
+# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
+/^  (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, "  KEY" ) }
+
+# Get rid of field lengths in KEY lines
+/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) }
+
+aInc == 1 && /PRIMARY KEY|primary key/ { next }
+
+# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
+/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) }
+
+# Print all fields definition lines except the `KEY` lines.
+/^  / && !/^(  (KEY|key)|\);)/ {
+  if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
+    aInc = 1
+    gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
+  }
+  gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
+  gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
+  # FIXME
+  #   CREATE TRIGGER [UpdateLastTime]
+  #   AFTER UPDATE
+  #   ON Package
+  #   FOR EACH ROW
+  #   BEGIN
+  #   UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
+  #   END
+  gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" )
+  gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
+  gsub( /(COLLATE|collate) [^ ]+ /, "" )
+  gsub( /(ENUM|enum)[^)]+\)/, "text " )
+  gsub( /(SET|set)\([^)]+\)/, "text " )
+  gsub( /UNSIGNED|unsigned/, "" )
+  gsub( /_utf8mb3/, "" )
+  gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
+  gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
+  ere_bit_field = "[bB]'[10]+'"
+  if( match($0, ere_bit_field) ){
+    sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
+  }
+
+  # remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY
+  # `hostname_domain` (`hostname`,`domain`) USING BTREE,"
+  gsub( / USING [^, ]+/, "" )
+
+  # field comments are not supported
+  gsub( / (COMMENT|comment).+$/, "" )
+  # Get commas off end of line
+  gsub( /,.?$/, "" )
+  if( prev ){
+    if( firstInTable ){
+      print prev
+      firstInTable = 0
+    }
+    else {
+      print "," prev
+    }
+  }
+  else {
+    # FIXME check if this is correct in all cases
+    if( match( $1,
+        /(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){
+      print ","
+    }
+  }
+  prev = $1
+}
+
+/ ENGINE| engine/ {
+  if( prev ){
+    if( firstInTable ){
+      print prev
+      firstInTable = 0
+    }
+    else {
+      print "," prev
+    }
+  }
+  prev=""
+  print ");"
+  next
+}
+# `KEY` lines are extracted from the `CREATE` block and stored in array for later print
+# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
+# avoid a sqlite error for duplicate index name.
+/^(  (KEY|key)|\);)/ {
+  if( prev ){
+    if( firstInTable ){
+      print prev
+      firstInTable = 0
+    }
+    else {
+      print "," prev
+    }
+  }
+  prev = ""
+  if( $0 == ");" ){
+    print
+  }
+  else {
+    if( match( $0, /`[^`]+/ ) ){
+      indexName = substr( $0, RSTART+1, RLENGTH-1 )
+    }
+    if( match( $0, /\([^()]+/ ) ){
+      indexKey = substr( $0, RSTART+1, RLENGTH-1 )
+    }
+    # idx_ prefix to avoid name clashes (they really happen!)
+    key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
+       tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
+  }
+}
+
+END {
+  if( no_END ){ exit 1}
+  # print all KEY creation lines.
+  for( table in key ){ printf key[table] }
+
+  print "END TRANSACTION;"
+
+  if( caseIssue ){
+    printerr( \
+      "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
+      "     or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
+      "     identifiers. Thus expect errors like \"table T has no column named F\".")
+  }
+}
diff --git a/sqlite-schema-diagram.sql b/sqlite-schema-diagram.sql
@@ -0,0 +1,172 @@
+-- We start a GraphViz graph
+SELECT '
+digraph structs {
+'
+UNION ALL
+
+-- Normally, GraphViz' "dot" command lays out a hierarchical graph from
+-- top to bottom.  However, we aren't just laying out individual nodes,
+-- each node is a vertical list of database fields.  To prevent GraphViz
+-- from snaking arrows all over the place, we constrain it to draw
+-- incoming references on the left of each field, and outgoing references
+-- on the right.  Since that's the way references flow for each database
+-- table, we tell GraphViz to lay the whole graph out left-to-right,
+-- which makes its job much easier and produces prettier output.
+SELECT '
+rankdir="LR"
+'
+UNION ALL
+
+
+-- By default, nodes have circles around them.  We will draw our own
+-- tables below, we do not want the circles.
+SELECT '
+node [shape=none]
+'
+UNION ALL
+
+-- This is the big query that renders a node complete with field names
+-- for each table in the database.  Because we want raw GraphViz output,
+-- our query returns rows with a single string field, whose value is a
+-- complex calculation using SQL as a templating engine.  This is kind
+-- of an abuse, but works nicely nevertheless.
+SELECT
+    CASE
+        -- When the previous row's table name is the same as this one,
+        -- do nothing.
+        WHEN LAG(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''
+
+        -- Otherwise, this is the first row of a new table, so start
+        -- the node markup and add a header row.  Normally in GraphViz,
+        -- the table name would *be* the label of the node, but since
+        -- we're using the label to represent the entire node, we have
+        -- to make our own header.
+        --
+        -- GraphViz does have a "record" label shape, but it seems tricky
+        -- to work with and I found the HTML-style label markup easier
+        -- to get working the way I wanted.
+        ELSE
+            t.name || ' [label=<
+            <TABLE BORDER="0" CELLSPACING="0" CELLBORDER="1">
+                <TR>
+                    <TD COLSPAN="2"><B>' || t.name || '</B></TD>
+                </TR>
+            '
+
+    -- After the header (if needed), we have rows for each field in
+    -- the table.
+    --
+    -- The "pk" metadata field is zero for table fields that are not part
+    -- of the primary key.  If the "pk" metadata field is 1 or more, that
+    -- tells you that table field's order in the (potentially composite)
+    -- primary key.
+    --
+    -- We also add ports to each of the table cells, so that we can
+    -- later tell GraphViz to specifically connect the ports representing
+    -- specific fields in each table, instead of connecting the tables
+    -- generally.
+    END || '
+                <TR>
+                    <TD PORT="' || i.name || '_to">' ||
+                        CASE i.pk WHEN 0 THEN '&nbsp;' ELSE '🔑' END ||
+                    '</TD>
+                    <TD PORT="' || i.name || '_from">' || i.name || '</TD>
+                </TR>
+            ' ||
+    CASE
+        -- When the next row's table name is the same as this one,
+        -- do nothing.
+        WHEN LEAD(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''
+
+        -- Otherwise, this is the last row of a database table, so end
+        -- the table markup.
+        ELSE '
+            </TABLE>
+        >];
+        '
+    END
+
+-- This is how you get nice relational data out of SQLite's metadata
+-- pragmas.
+FROM pragma_table_list() AS t
+    JOIN pragma_table_info(t.name, t.schema) AS i
+
+WHERE
+    -- SQLite has a bunch of metadata tables in each schema, which
+    -- are hidden from .tables and .schema but which are reported
+    -- in pragma_table_list().  They're not user-created and almost
+    -- certainly user databases don't have foreign keys to them, so
+    -- let's just filter them out.
+    t.name NOT LIKE 'sqlite_%'
+
+    -- Despite its name, pragma_table_list() also includes views.
+    -- Since those don't store any information or have any correctness
+    -- constraints, they're just distracting if you're trying to quickly
+    -- understand a database's schema, so we'll filter them out too.
+    AND t.type = 'table'
+UNION ALL
+
+-- Now we have all the database tables set up, we can draw the links
+-- between them.  SQLite gives us the pragma_foreign_key_list() function
+-- which (for a given source table) lists all the source fields that are
+-- part of a foreign key reference, the target table they refer to, and
+-- (if it was created with "REFERENCES table_name(column_name)" syntax,
+-- the target column names too.  Unfortunately, if the reference was
+-- created with "REFERENCES table_name" syntax, the pragma does *not*
+-- figure out what the corresponding target fields are, so we'll also need
+-- pragma_table_info() to look up the primary key(s) for the target table.
+--
+-- Once we have everything we need, we just do a bit more string
+-- concatenation to build up the GraphViz syntax equivalent.
+--
+-- Note that we use the ports we defined above, as well as the directional
+-- overrides :e and :w, to force GraphViz to give us a layout that's
+-- likely to be readable.
+SELECT
+
+    -- We left-join every foreign key field against pragma_table_info
+    -- looking for primary keys, and the target table may have a composite
+    -- primary key even if the foreign key does not reference the primary
+    -- key, so we may wind up with multiple results describing the same
+    -- foreign key reference.  DISTINCT makes sure we only describe each
+    -- reference once.
+    DISTINCT
+
+    t.name || ':' || f."from" || '_from:e -> ' ||
+
+    -- If the constraint was created with "REFERENCES
+    -- table_name(column_name)", then f.to will contain 'column_name'.
+    -- Otherwise, f.to is NULL, and we need to grab the corresponding
+    -- field from the primary key in i.name.
+    f."table" || ':' || COALESCE(f."to", i.name) || '_to:w'
+
+FROM pragma_table_list() AS t
+    JOIN pragma_foreign_key_list(t.name, t.schema) AS f
+
+    -- We look up all the fields in the target table, just in case
+    -- pragma_foreign_key_list() doesn't tell us what the target field
+    -- name is.  SQLite doesn't allow foreign-key references to cross
+    -- schemas, so it's OK to use the source table's schema name to look
+    -- up the target table.
+    --
+    -- Strictly speaking, we shouldn't need to LEFT JOIN here, a basic
+    -- JOIN should do.  This works around a bug in SQLite 3.16.0 to
+    -- version 3.45.1: https://sqlite.org/forum/forumpost/b1656fcb39
+    LEFT JOIN pragma_table_info(f."table", t.schema) AS i
+
+-- f.seq represents the order of fields in a source table's composite foreign key
+-- reference, starting at 0.  In "FOREIGN KEY (a, b)", "a" would have
+-- seq=0 and "b" would have seq=1.  i.pk represents the order of fields
+-- in a primary key, where "0" means "not part of the primary key".
+-- In "PRIMARY KEY (a, b)", "a" would have pk=1 and "b" would have pk=2.
+-- For a foreign key reference that specifies the target field name,
+-- none of this matters, but if the target field name is missing, then
+-- this makes sure that each field of the foreign key reference is joined
+-- with the corresponding primary key field of the target table.
+WHERE f.seq + 1 = i.pk
+
+UNION ALL
+
+-- Lastly, we close the GraphViz graph.
+SELECT '
+}';
diff --git a/gistfile1.txt → steps b/gistfile1.txt → steps
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,13 @@
+# Origin: https://news.ycombinator.com/item?id=39798115
+
+# Replace the database name by yours
+mysqldump --no-data $DB_NAME --result-file myschema.sql
+
+# Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
+./mysql2sqlite ~/Downloads/myschema.sql | sqlite3 mysqlite3.db
+
+# Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
+sqlite3 ./mysqlite3.db -init sqlite-schema-diagram.sql "" > schema.dot
+
+# Install Graphviz
+dot -Tsvg schema.dot > schema.svg