Skip to content

Instantly share code, notes, and snippets.

@Lakhan-Nad
Last active November 24, 2022 20:00
Show Gist options
  • Save Lakhan-Nad/b7c9ec45880696eeb0926b8c72e6fb5d to your computer and use it in GitHub Desktop.
Save Lakhan-Nad/b7c9ec45880696eeb0926b8c72e6fb5d to your computer and use it in GitHub Desktop.
Bencode decoder in Kotlin
import java.lang.Exception
import java.nio.charset.Charset
// Bencode parser in Kotlin
object BencodeParser {
class InvalidBencodeFormat(
startIndex: Int,
msg: String
): Exception("Invalid Bencode since index $startIndex: $msg")
abstract class TypeParser<T> {
abstract val name: String
abstract fun parseData(
b: ByteArray,
start: Int
): EndPointerAndValue<T?>
}
private object Constants {
const val ZeroASCII: Byte = 48
const val NineASCII: Byte = 57
const val SmallIASCII: Byte = 105
const val ColonASCII: Byte = 58
const val HyphenASCII: Byte = 45
const val SmallEASCII: Byte = 101
const val SmallLASCII: Byte = 108
const val SmallDASCII: Byte = 100
}
private fun readLength(b: ByteArray, start: Int): EndPointerAndValue<Int?> {
if (start >= b.size) {
// to indicate that no value was read we send null,
// instead of sending 0
return EndPointerAndValue(start - 1, null)
}
var len = 0
var st = start
while (
st < b.size &&
b[st] >= Constants.ZeroASCII && b[st] <= Constants.NineASCII
) {
len = len.times(10) + (b[st] - Constants.ZeroASCII)
st += 1
}
return EndPointerAndValue(st - 1, len)
}
private fun ByteArray.getCheckLen(index: Int): Byte {
if (this.size <= index) {
throw InvalidBencodeFormat(
this.size,
"unexpected ending of byte string, supposed to had minimum length of ${index + 1} bytes."
)
}
return this[index]
}
private fun ByteArray.compareTo(other: ByteArray): Int {
var i = 0
while (i < this.size && i < other.size) {
when (val diff = this[i] - other[i]) {
0 -> i += 1
else -> return diff
}
}
return this.size - other.size
}
private fun endIndicatorCheck(
b: ByteArray,
contentEnd: Int,
parseType: String
) {
if (b.getCheckLen(contentEnd + 1) != Constants.SmallEASCII) {
throw InvalidBencodeFormat(
contentEnd + 1,
"no ending `e` found after reading $parseType content"
)
}
}
private fun startIndicatorChecker(
b: ByteArray,
start: Int,
startIndicator: Byte,
parseType: String
) {
if (b.getCheckLen(start) != startIndicator) {
throw InvalidBencodeFormat(
start,
"invalid start of $parseType found no $startIndicator present"
)
}
}
private fun checkNonNullError(
value: Any?,
st: Int,
expectedType: String
) {
if (value == null) {
throw InvalidBencodeFormat(
st, "value found null but expected $expectedType"
)
}
}
private fun getParser(b: Byte): TypeParser<out Any> {
return when {
b == Constants.SmallIASCII -> Parsers.NumberParser
b == Constants.SmallLASCII -> Parsers.ListParser
b == Constants.SmallDASCII -> Parsers.DictionaryParser
Constants.ZeroASCII <= b && b <= Constants.NineASCII -> Parsers.StringParser
// conforming to NULL object pattern
else -> Parsers.InvalidParser
}
}
private object Parsers {
/**
* format - len:content
* len -> length of content,
* content -> ascii encoded byte string
*/
object StringParser : TypeParser<ByteArray>() {
override val name: String
get() = "String"
override fun parseData(
b: ByteArray,
start: Int
): EndPointerAndValue<ByteArray?> {
val (end, nullableLen) = readLength(b, start)
checkNonNullError(nullableLen, end, "positive integer")
val len = nullableLen!!
if (b.getCheckLen(end + 1) != Constants.ColonASCII) {
InvalidBencodeFormat(
end + 1,
"ill formed byte string in Bencode no : found between length and content"
)
}
if ((end + len + 1) >= b.size) {
InvalidBencodeFormat(
end + 1,
"should have length $len but only found ${b.size - end + 1} bytes."
)
}
return EndPointerAndValue(
end + len + 1, // len is size of str + 1 for colon
b.slice(IntRange(end + 2, end + 1 + len)).toByteArray()
)
}
}
/**
* format - i<integer encoded in base ten ASCII>e
*
* PS: we only support long for now :)
*/
object NumberParser : TypeParser<Int>() {
override val name: String
get() = "Integer"
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Int?> {
startIndicatorChecker(b, start, Constants.SmallIASCII, name)
val isNegative = b.getCheckLen(start+1) == Constants.HyphenASCII
var numStart = start + 1
if (isNegative) {
numStart += 1
}
val (end, num) = readLength(b, numStart)
checkNonNullError(num, numStart, "positive integer")
endIndicatorCheck(b, end, name)
return if (isNegative) {
EndPointerAndValue(end + 1, num!!.times(-1))
} else {
EndPointerAndValue(end + 1, num!!)
}
}
}
/**
* format - l<content>e
* <content> -> <elem><elem>....
* elem ->. str | integer | list | dict
*/
object ListParser : TypeParser<List<Any>>() {
override val name: String
get() = "List"
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<List<Any>?> {
startIndicatorChecker(b, start, Constants.SmallLASCII, name)
val list = mutableListOf<Any>()
var st = start + 1
while (b.getCheckLen(st) != Constants.SmallEASCII) {
val (end, value) = getParser(b[st]).parseData(b, st)
checkNonNullError(value, st, "any element")
st = end + 1
list.add(value!!)
}
endIndicatorCheck(b, st - 1, name)
return EndPointerAndValue(st, list)
}
}
/**
* format - d<content>e
* <content> -> str<element>str<element>.....
* str -> byte string,
* elem ->. str | integer | list | dict
*/
object DictionaryParser : TypeParser<Map<ByteArray, Any>>() {
override val name: String
get() = "Dictionary"
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Map<ByteArray, Any>?> {
startIndicatorChecker(b, start, Constants.SmallDASCII, name)
val map = mutableMapOf<ByteArray, Any>()
var st = start + 1
var lastKey: ByteArray? = null
while (b.getCheckLen(st) != Constants.SmallEASCII) {
val (endStr, key) = StringParser.parseData(b, st)
checkNonNullError(key, st, "byte string")
val (end, value) = getParser(b.getCheckLen(endStr + 1)).parseData(b, endStr + 1)
checkNonNullError(value, st, "any element")
st = end + 1
lastKey?.let {
if (it.compareTo(key!!) >= 0) {
throw InvalidBencodeFormat(
endStr + 1,
"the keys in dict are not lexicographically ordered."
)
}
}
lastKey = key!!
map[key!!] = value!!
}
endIndicatorCheck(b, st - 1, name)
return EndPointerAndValue(st, map)
}
}
/**
* The Invalid Parser
*/
object InvalidParser : TypeParser<Any>() {
override val name: String
get() = "Bencode"
override fun parseData(b: ByteArray, start: Int): EndPointerAndValue<Nothing> {
throw InvalidBencodeFormat(
start,
"no valid parseable element type"
)
}
}
}
/**
* main Bencode parser
*/
fun parse(b: ByteArray): List<Any> {
val list = mutableListOf<Any>()
var s = 0
println("Size: ${b.size}")
while (s < b.size) {
val (end, value) = getParser(b.getCheckLen(s)).parseData(b, s)
s = end + 1
checkNonNullError(value, end, "any element")
list.add(value!!)
}
return list
}
}
typealias EndPointerAndValue<T> = Pair<Int, T>
fun main() {
// Because Bencode is an ASCII encoded format
readLine()?.let { inputStr ->
val value = BencodeParser.parse(inputStr.toByteArray(Charset.forName("ascii")))
value.forEach {
printAny(it)
}
}
}
private fun printAny(it: Any) {
when (it) {
is Number -> println("Number: $it")
is ByteArray -> println("String: ${it.toString(Charset.forName("ascii"))}")
is Map<*, *> -> {
println("Map")
it.entries.forEach { mapEntry ->
println("${(mapEntry.key as ByteArray).toString(Charset.forName("ascii"))} : ")
printAny(mapEntry.value!!)
}
println("Map end")
}
is List<*> -> {
println("List")
it.forEach { entry ->
printAny(entry!!)
}
println("List End")
}
}
}
@Lakhan-Nad
Copy link
Author

Lakhan-Nad commented Nov 24, 2022

one missing piece here is that the keys in dictionary should be in lexicographical order.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment