Created
July 6, 2020 13:55
-
-
Save szeiger/fd7443ead01c05a82db6a36083acdc32 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os._ | |
import $ivy.`com.nrinaudo::kantan.csv:0.6.1` | |
import kantan.csv._ | |
import kantan.csv.ops._ | |
@main | |
def main(in: Path, out: Path): Unit = { | |
val all = in.toIO.asCsvReader[Vector[String]](rfc).map(_.right.get).toList | |
val h = all.head | |
val data = all.tail.filter(_.length > 1) | |
//val attrs = Map("CI-D-0000200054" -> "Hauptspeicher") | |
val attrs = data.iterator.filter(_(30).nonEmpty).map(v => (v(30), v(31))).distinct | |
val h2 = h.take(30) ++ attrs.map { case (id, n) => s"$id ($n)" } ++ h.drop(33) | |
val data2 = data.groupBy(_(0)).toSeq.sortBy(_._1).flatMap { case (id, entries) => | |
val (attrEntries, rest) = entries.partition(_(30).nonEmpty) | |
val attrMap = attrEntries.map(v => (v(30), v(32))).toMap | |
val attrData = attrs.map { case (id, _) => attrMap.getOrElse(id, "") } | |
rest.map(v => v.take(30) ++ attrData ++ v.drop(33)) | |
} | |
val writer = out.toIO.asCsvWriter[Vector[String]](rfc.withHeader(h2: _*)) | |
data2.foreach(writer.write) | |
writer.close() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment