Skip to content

Instantly share code, notes, and snippets.

@szeiger
Created July 6, 2020 13:55
Show Gist options
  • Save szeiger/fd7443ead01c05a82db6a36083acdc32 to your computer and use it in GitHub Desktop.
Save szeiger/fd7443ead01c05a82db6a36083acdc32 to your computer and use it in GitHub Desktop.
import os._
import $ivy.`com.nrinaudo::kantan.csv:0.6.1`
import kantan.csv._
import kantan.csv.ops._
@main
def main(in: Path, out: Path): Unit = {
val all = in.toIO.asCsvReader[Vector[String]](rfc).map(_.right.get).toList
val h = all.head
val data = all.tail.filter(_.length > 1)
//val attrs = Map("CI-D-0000200054" -> "Hauptspeicher")
val attrs = data.iterator.filter(_(30).nonEmpty).map(v => (v(30), v(31))).distinct
val h2 = h.take(30) ++ attrs.map { case (id, n) => s"$id ($n)" } ++ h.drop(33)
val data2 = data.groupBy(_(0)).toSeq.sortBy(_._1).flatMap { case (id, entries) =>
val (attrEntries, rest) = entries.partition(_(30).nonEmpty)
val attrMap = attrEntries.map(v => (v(30), v(32))).toMap
val attrData = attrs.map { case (id, _) => attrMap.getOrElse(id, "") }
rest.map(v => v.take(30) ++ attrData ++ v.drop(33))
}
val writer = out.toIO.asCsvWriter[Vector[String]](rfc.withHeader(h2: _*))
data2.foreach(writer.write)
writer.close()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment