Last active
July 14, 2023 11:55
-
-
Save ant8e/708d05d80f19886236584e02cb1d4798 to your computer and use it in GitHub Desktop.
Crockford32
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2023 Antoine Comte | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package tech.ant8e.crockford32 | |
import scala.annotation.{switch, tailrec} | |
object Crockford32 { | |
def encode(buffer: Array[Byte]): String = encode_("", buffer) | |
def decode(s: String): Array[Byte] = { | |
val values = s.collect(c => | |
cf32_dec(c) match { | |
case value if value >= 0 => value.toByte | |
} | |
) | |
decode_(Array.empty[Byte], values) | |
} | |
@tailrec | |
private def encode_(accumulator: String, buffer: Array[Byte]): String = { | |
(buffer.length: @switch) match { | |
case 0 => accumulator | |
case 1 => | |
accumulator + encode_5_bytes(buffer(0), `0b0`, `0b0`, `0b0`, `0b0`) | |
.take(2) | |
case 2 => | |
accumulator + encode_5_bytes(buffer(0), buffer(1), `0b0`, `0b0`, `0b0`) | |
.take(4) | |
case 3 => | |
accumulator + encode_5_bytes( | |
buffer(0), | |
buffer(1), | |
buffer(2), | |
`0b0`, | |
`0b0` | |
).take(5) | |
case 4 => | |
accumulator + encode_5_bytes( | |
buffer(0), | |
buffer(1), | |
buffer(2), | |
buffer(3), | |
`0b0` | |
).take(7) | |
case _ => | |
encode_( | |
accumulator + encode_5_bytes( | |
buffer(0), | |
buffer(1), | |
buffer(2), | |
buffer(3), | |
buffer(4) | |
), | |
buffer.drop(5) | |
) | |
} | |
} | |
@tailrec | |
def decode_(acc: Array[Byte], values: IndexedSeq[Byte]): Array[Byte] = | |
(values.length: @switch) match { | |
case 0 | 1 => acc | |
case 2 | 3 => | |
decode_( | |
acc ++ decode_5_bytes( | |
values(0), | |
values(1), | |
`0b0`, | |
`0b0`, | |
`0b0`, | |
`0b0`, | |
`0b0`, | |
`0b0` | |
).take(1), | |
values.drop(2) | |
) | |
case 4 => | |
decode_( | |
acc ++ decode_5_bytes( | |
values(0), | |
values(1), | |
values(2), | |
values(3), | |
`0b0`, | |
`0b0`, | |
`0b0`, | |
`0b0` | |
).take( | |
2 | |
), | |
values.drop(4) | |
) | |
case 5 | 6 => | |
decode_( | |
acc ++ decode_5_bytes( | |
values(0), | |
values(1), | |
values(2), | |
values(3), | |
values(4), | |
`0b0`, | |
`0b0`, | |
`0b0` | |
).take(3), | |
values.drop(5) | |
) | |
case 7 => | |
decode_( | |
acc ++ decode_5_bytes( | |
values(0), | |
values(1), | |
values(2), | |
values(3), | |
values(4), | |
values(5), | |
values(6), | |
`0b0` | |
).take(4), | |
values.drop(7) | |
) | |
case _ => | |
decode_( | |
acc ++ decode_5_bytes( | |
values(0), | |
values(1), | |
values(2), | |
values(3), | |
values(4), | |
values(5), | |
values(6), | |
values(7) | |
), | |
values.drop(8) | |
) | |
} | |
@inline | |
def encode_5_bytes( | |
b0: Byte, | |
b1: Byte, | |
b2: Byte, | |
b3: Byte, | |
b4: Byte | |
): String = { | |
val c0 = ms5b(b0) | |
val c1 = ls3b(b0) << 2 | ms2b(b1) | |
val c2 = ls5b((b1 >> 1).toByte) | |
val c3 = (b1 & Mask1Bits) << 4 | ms4b(b2) | |
val c4 = ls4b(b2) << 1 | ms1b(b3) | |
val c5 = ls5b((b3 >> 2).toByte) | |
val c6 = ls2b(b3) << 3 | ms3b(b4) | |
val c7 = ls5b(b4) | |
cf32_enc(c0) + cf32_enc(c1) + cf32_enc(c2) + cf32_enc(c3) + cf32_enc( | |
c4 | |
) + cf32_enc(c5) + cf32_enc( | |
c6 | |
) + cf32_enc(c7) | |
} | |
@inline | |
def decode_5_bytes( | |
c0: Byte, | |
c1: Byte, | |
c2: Byte, | |
c3: Byte, | |
c4: Byte, | |
c5: Byte, | |
c6: Byte, | |
c7: Byte | |
): Array[Byte] = { | |
val b0 = ls5b(c0) << 3 | ms3b((c1 << 3).toByte) | |
val b1 = ls2b(c1) << 6 | ls5b(c2) << 1 | ms1b((c3 << 3).toByte) | |
val b2 = ls4b(c3) << 4 | ms4b((c4 << 3).toByte) | |
val b3 = ls1b(c4) << 7 | ls5b(c5) << 2 | ms2b((c6 << 3).toByte) | |
val b4 = ls3b(c6) << 5 | ls5b(c7) | |
Array(b0.toByte, b1.toByte, b2.toByte, b3.toByte, b4.toByte) | |
} | |
private val encodingTable = | |
"0123456789ABCDEFGHJKMNPQRSTVWXYZ".toCharArray.map(_.toString) | |
@inline | |
private def cf32_enc(i: Int): String = try { encodingTable(i) } | |
catch { case _: Throwable => "" } | |
@inline | |
private def cf32_dec(c: Char): Byte = c match { | |
case '0' | 'o' | 'O' => 0 | |
case '1' | 'i' | 'I' | 'l' | 'L' => 1 | |
case '2' => 2 | |
case '3' => 3 | |
case '4' => 4 | |
case '5' => 5 | |
case '6' => 6 | |
case '7' => 7 | |
case '8' => 8 | |
case '9' => 9 | |
case 'a' | 'A' => 10 | |
case 'b' | 'B' => 11 | |
case 'c' | 'C' => 12 | |
case 'd' | 'D' => 13 | |
case 'e' | 'E' => 14 | |
case 'f' | 'F' => 15 | |
case 'g' | 'G' => 16 | |
case 'h' | 'H' => 17 | |
case 'j' | 'J' => 18 | |
case 'k' | 'K' => 19 | |
case 'm' | 'M' => 20 | |
case 'n' | 'N' => 21 | |
case 'p' | 'P' => 22 | |
case 'q' | 'Q' => 23 | |
case 'r' | 'R' => 24 | |
case 's' | 'S' => 25 | |
case 't' | 'T' => 26 | |
case 'v' | 'V' => 27 | |
case 'w' | 'W' => 28 | |
case 'x' | 'X' => 29 | |
case 'y' | 'Y' => 30 | |
case 'z' | 'Z' => 31 | |
case _ => -1 | |
} | |
private val Mask5Bits: Byte = 0x1f | |
private val Mask4Bits: Byte = 0x0f | |
private val Mask3Bits: Byte = 0x07 | |
private val Mask2Bits: Byte = 0x03 | |
private val Mask1Bits: Byte = 0x01 | |
private val `0b0`: Byte = 0.toByte | |
@inline | |
private def ls1b(b: Byte) = { | |
b & Mask1Bits | |
} | |
@inline | |
private def ls2b(b: Byte) = { | |
b & Mask2Bits | |
} | |
@inline | |
private def ls3b(b: Byte) = { | |
b & Mask3Bits | |
} | |
@inline | |
private def ls4b(b: Byte) = { | |
b & Mask4Bits | |
} | |
@inline | |
private def ls5b(b: Byte) = { | |
b & Mask5Bits | |
} | |
@inline | |
private def ms5b(b: Byte) = { | |
(b >> 3) & Mask5Bits | |
} | |
@inline | |
private def ms4b(b: Byte) = { | |
(b >> 4) & Mask4Bits | |
} | |
@inline | |
private def ms3b(b: Byte) = { | |
(b >> 5) & Mask3Bits | |
} | |
@inline | |
private def ms2b(b: Byte) = { | |
(b >> 6) & Mask2Bits | |
} | |
@inline | |
private def ms1b(b: Byte) = { | |
(b >> 7) & Mask1Bits | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2023 Antoine Comte | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package tech.ant8e.crockford32 | |
import munit.FunSuite | |
class Crockford32Suite extends FunSuite { | |
test("Crockford32 encoder should encode correctly") { | |
assertEquals(Crockford32.encode(Array.empty[Byte]), "") | |
assertEquals(cf32enc"A", "84") | |
assertEquals(cf32enc"BZ", "89D0") | |
assertEquals(cf32enc"CY0", "8DCK0") | |
assertEquals(cf32enc"CY0", "8DCK0") | |
assertEquals( | |
cf32enc"The quick brown fox jumps over the lazy dog.", | |
"AHM6A83HENMP6TS0C9S6YXVE41K6YY10D9TPTW3K41QQCSBJ41T6GS90DHGQMY90CHQPEBG" | |
) | |
} | |
test("Crockford32 encoder should decode correctly") { | |
assertEquals( | |
Crockford32.decode("").toSeq, | |
Seq.empty[Byte] | |
) | |
assertEquals(cf32dec"84", bytes"A") | |
assertEquals(cf32dec"89D0", bytes"BZ") | |
assertEquals(cf32dec"8DCK0", bytes"CY0") | |
assertEquals(cf32dec"8DCK0", bytes"CY0") | |
assertEquals( | |
cf32dec"AHM6A83HENMP6TS0C9S6YXVE41K6YY10D9TPTW3K41QQCSBJ41T6GS90DHGQMY90CHQPEBG", | |
"The quick brown fox jumps over the lazy dog.".getBytes.toSeq | |
) | |
} | |
implicit class Crockford32Ops(private val sc: StringContext) { | |
def cf32enc(args: Any*): String = | |
Crockford32.encode(sc.s(args: _*).getBytes("UTF-8")) | |
def cf32dec(args: Any*): Seq[Byte] = | |
Crockford32.decode(sc.s(args: _*)).toSeq | |
def bytes(args: Any*): Seq[Byte] = (sc.s(args: _*)).getBytes.toSeq | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment