Last active
November 24, 2022 20:00
-
-
Save Lakhan-Nad/b7c9ec45880696eeb0926b8c72e6fb5d to your computer and use it in GitHub Desktop.
Bencode decoder in Kotlin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.Exception | |
import java.nio.charset.Charset | |
// Bencode parser in Kotlin | |
object BencodeParser { | |
class InvalidBencodeFormat( | |
startIndex: Int, | |
msg: String | |
): Exception("Invalid Bencode since index $startIndex: $msg") | |
abstract class TypeParser<T> { | |
abstract val name: String | |
abstract fun parseData( | |
b: ByteArray, | |
start: Int | |
): EndPointerAndValue<T?> | |
} | |
private object Constants { | |
const val ZeroASCII: Byte = 48 | |
const val NineASCII: Byte = 57 | |
const val SmallIASCII: Byte = 105 | |
const val ColonASCII: Byte = 58 | |
const val HyphenASCII: Byte = 45 | |
const val SmallEASCII: Byte = 101 | |
const val SmallLASCII: Byte = 108 | |
const val SmallDASCII: Byte = 100 | |
} | |
private fun readLength(b: ByteArray, start: Int): EndPointerAndValue<Int?> { | |
if (start >= b.size) { | |
// to indicate that no value was read we send null, | |
// instead of sending 0 | |
return EndPointerAndValue(start - 1, null) | |
} | |
var len = 0 | |
var st = start | |
while ( | |
st < b.size && | |
b[st] >= Constants.ZeroASCII && b[st] <= Constants.NineASCII | |
) { | |
len = len.times(10) + (b[st] - Constants.ZeroASCII) | |
st += 1 | |
} | |
return EndPointerAndValue(st - 1, len) | |
} | |
private fun ByteArray.getCheckLen(index: Int): Byte { | |
if (this.size <= index) { | |
throw InvalidBencodeFormat( | |
this.size, | |
"unexpected ending of byte string, supposed to had minimum length of ${index + 1} bytes." | |
) | |
} | |
return this[index] | |
} | |
private fun ByteArray.compareTo(other: ByteArray): Int { | |
var i = 0 | |
while (i < this.size && i < other.size) { | |
when (val diff = this[i] - other[i]) { | |
0 -> i += 1 | |
else -> return diff | |
} | |
} | |
return this.size - other.size | |
} | |
private fun endIndicatorCheck( | |
b: ByteArray, | |
contentEnd: Int, | |
parseType: String | |
) { | |
if (b.getCheckLen(contentEnd + 1) != Constants.SmallEASCII) { | |
throw InvalidBencodeFormat( | |
contentEnd + 1, | |
"no ending `e` found after reading $parseType content" | |
) | |
} | |
} | |
private fun startIndicatorChecker( | |
b: ByteArray, | |
start: Int, | |
startIndicator: Byte, | |
parseType: String | |
) { | |
if (b.getCheckLen(start) != startIndicator) { | |
throw InvalidBencodeFormat( | |
start, | |
"invalid start of $parseType found no $startIndicator present" | |
) | |
} | |
} | |
private fun checkNonNullError( | |
value: Any?, | |
st: Int, | |
expectedType: String | |
) { | |
if (value == null) { | |
throw InvalidBencodeFormat( | |
st, "value found null but expected $expectedType" | |
) | |
} | |
} | |
private fun getParser(b: Byte): TypeParser<out Any> { | |
return when { | |
b == Constants.SmallIASCII -> Parsers.NumberParser | |
b == Constants.SmallLASCII -> Parsers.ListParser | |
b == Constants.SmallDASCII -> Parsers.DictionaryParser | |
Constants.ZeroASCII <= b && b <= Constants.NineASCII -> Parsers.StringParser | |
// conforming to NULL object pattern | |
else -> Parsers.InvalidParser | |
} | |
} | |
private object Parsers { | |
/** | |
* format - len:content | |
* len -> length of content, | |
* content -> ascii encoded byte string | |
*/ | |
object StringParser : TypeParser<ByteArray>() { | |
override val name: String | |
get() = "String" | |
override fun parseData( | |
b: ByteArray, | |
start: Int | |
): EndPointerAndValue<ByteArray?> { | |
val (end, nullableLen) = readLength(b, start) | |
checkNonNullError(nullableLen, end, "positive integer") | |
val len = nullableLen!! | |
if (b.getCheckLen(end + 1) != Constants.ColonASCII) { | |
InvalidBencodeFormat( | |
end + 1, | |
"ill formed byte string in Bencode no : found between length and content" | |
) | |
} | |
if ((end + len + 1) >= b.size) { | |
InvalidBencodeFormat( | |
end + 1, | |
"should have length $len but only found ${b.size - end + 1} bytes." | |
) | |
} | |
return EndPointerAndValue( | |
end + len + 1, // len is size of str + 1 for colon | |
b.slice(IntRange(end + 2, end + 1 + len)).toByteArray() | |
) | |
} | |
} | |
/** | |
* format - i<integer encoded in base ten ASCII>e | |
* | |
* PS: we only support long for now :) | |
*/ | |
object NumberParser : TypeParser<Int>() { | |
override val name: String | |
get() = "Integer" | |
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Int?> { | |
startIndicatorChecker(b, start, Constants.SmallIASCII, name) | |
val isNegative = b.getCheckLen(start+1) == Constants.HyphenASCII | |
var numStart = start + 1 | |
if (isNegative) { | |
numStart += 1 | |
} | |
val (end, num) = readLength(b, numStart) | |
checkNonNullError(num, numStart, "positive integer") | |
endIndicatorCheck(b, end, name) | |
return if (isNegative) { | |
EndPointerAndValue(end + 1, num!!.times(-1)) | |
} else { | |
EndPointerAndValue(end + 1, num!!) | |
} | |
} | |
} | |
/** | |
* format - l<content>e | |
* <content> -> <elem><elem>.... | |
* elem ->. str | integer | list | dict | |
*/ | |
object ListParser : TypeParser<List<Any>>() { | |
override val name: String | |
get() = "List" | |
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<List<Any>?> { | |
startIndicatorChecker(b, start, Constants.SmallLASCII, name) | |
val list = mutableListOf<Any>() | |
var st = start + 1 | |
while (b.getCheckLen(st) != Constants.SmallEASCII) { | |
val (end, value) = getParser(b[st]).parseData(b, st) | |
checkNonNullError(value, st, "any element") | |
st = end + 1 | |
list.add(value!!) | |
} | |
endIndicatorCheck(b, st - 1, name) | |
return EndPointerAndValue(st, list) | |
} | |
} | |
/** | |
* format - d<content>e | |
* <content> -> str<element>str<element>..... | |
* str -> byte string, | |
* elem ->. str | integer | list | dict | |
*/ | |
object DictionaryParser : TypeParser<Map<ByteArray, Any>>() { | |
override val name: String | |
get() = "Dictionary" | |
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Map<ByteArray, Any>?> { | |
startIndicatorChecker(b, start, Constants.SmallDASCII, name) | |
val map = mutableMapOf<ByteArray, Any>() | |
var st = start + 1 | |
var lastKey: ByteArray? = null | |
while (b.getCheckLen(st) != Constants.SmallEASCII) { | |
val (endStr, key) = StringParser.parseData(b, st) | |
checkNonNullError(key, st, "byte string") | |
val (end, value) = getParser(b.getCheckLen(endStr + 1)).parseData(b, endStr + 1) | |
checkNonNullError(value, st, "any element") | |
st = end + 1 | |
lastKey?.let { | |
if (it.compareTo(key!!) >= 0) { | |
throw InvalidBencodeFormat( | |
endStr + 1, | |
"the keys in dict are not lexicographically ordered." | |
) | |
} | |
} | |
lastKey = key!! | |
map[key!!] = value!! | |
} | |
endIndicatorCheck(b, st - 1, name) | |
return EndPointerAndValue(st, map) | |
} | |
} | |
/** | |
* The Invalid Parser | |
*/ | |
object InvalidParser : TypeParser<Any>() { | |
override val name: String | |
get() = "Bencode" | |
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Nothing> { | |
throw InvalidBencodeFormat( | |
start, | |
"no valid parseable element type" | |
) | |
} | |
} | |
} | |
/** | |
* main Bencode parser | |
*/ | |
fun parse(b: ByteArray): List<Any> { | |
val list = mutableListOf<Any>() | |
var s = 0 | |
println("Size: ${b.size}") | |
while (s < b.size) { | |
val (end, value) = getParser(b.getCheckLen(s)).parseData(b, s) | |
s = end + 1 | |
checkNonNullError(value, end, "any element") | |
list.add(value!!) | |
} | |
return list | |
} | |
} | |
typealias EndPointerAndValue<T> = Pair<Int, T> | |
fun main() { | |
// Because Bencode is an ASCII encoded format | |
readLine()?.let { inputStr -> | |
val value = BencodeParser.parse(inputStr.toByteArray(Charset.forName("ascii"))) | |
value.forEach { | |
printAny(it) | |
} | |
} | |
} | |
private fun printAny(it: Any) { | |
when (it) { | |
is Number -> println("Number: $it") | |
is ByteArray -> println("String: ${it.toString(Charset.forName("ascii"))}") | |
is Map<*, *> -> { | |
println("Map") | |
it.entries.forEach { mapEntry -> | |
println("${(mapEntry.key as ByteArray).toString(Charset.forName("ascii"))} : ") | |
printAny(mapEntry.value!!) | |
} | |
println("Map end") | |
} | |
is List<*> -> { | |
println("List") | |
it.forEach { entry -> | |
printAny(entry!!) | |
} | |
println("List End") | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
one missing piece here is that the keys in dictionary should be in lexicographical order.