Last active
October 3, 2021 07:11
-
-
Save tonsky/881d5d8c4fbed818fe2905a7591a91e0 to your computer and use it in GitHub Desktop.
CSV file sorting https://t.me/nikitonsky_pub/201
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let fs = require('fs') | |
let parse = require('csv-parse/lib/sync') | |
function totalSalary(path) { | |
let csv = parse(fs.readFileSync(path), { columns: true }) | |
return csv.reduce((total, row) => total + parseFloat(row.Salary, 10), 0) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def total_salary(path: str) -> list[float]: | |
with open(path) as fp: | |
return sorted((float(row['Salary']) for row in csv.DictReader(fp)), reverse=True)[:10] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def total_salary(path: str) -> list[float]: | |
with open(path) as fp: | |
return heapq.nlargest(10, (float(row['Salary']) for row in csv.DictReader(fp))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.file.* | |
import kotlin.io.* | |
fun top10Salaries(path: Path) = | |
path.toFile().readLines().let { lines -> | |
lines.firstOrNull() | |
?.split(',') | |
?.indexOf("Salary") | |
?.let { index -> | |
lines.drop(1) | |
.mapNotNull { | |
it.split(',') | |
.getOrNull(index) | |
?.toFloatOrNull() | |
} | |
.sortedBy { it } | |
.takeLast(10) | |
.forEach { println(it) } | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.IO | |
let splitBy (c: char) (s: string) = s.Split(c) | |
let showTopSalaries number path = | |
let f = File.ReadLines path |> Seq.cache | |
let index = | |
f | |
|> Seq.head | |
|> splitBy ',' | |
|> Seq.findIndex (fun s -> s = "Salary") | |
f | |
|> Seq.tail | |
|> Seq.map (splitBy ',' >> fun i -> i.[index] |> float) | |
|> Seq.sortDescending | |
|> Seq.take number | |
|> Seq.indexed | |
|> Seq.iter (fun (x,y) -> printfn "Top %d: %.2f $$$" (x+1) y) | |
(* | |
CSV file should have following headers | |
.__________+_____+________. | |
| FullName | Age | Salary | | |
!__________!_____!________! | |
*) | |
[<EntryPoint>] | |
let main argv = | |
argv.[0] | |
|> showTopSalaries 10 | |
0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x = read.csv(“~/[path]”, header = T) | |
head(sort(x$Salary, decreasing=TRUE), n = 10) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prostaya_programma = do | |
zagolovki : dannye <- map (splitOn ",") . lines <$> readFile "input.csv" | |
let Just nomer_interesnoi_kolonki = elemIndex "Salary" zagolovki | |
dannye | |
& map (!! nomer_interesnoi_kolonki) | |
& map (read @Double) | |
& sortOn negate | |
& take 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int idx = Files.lines(Path.of("path/to/file")) | |
.limit(1) | |
.map(line -> List.of(line.split(",")).indexOf("Salary")) | |
.findFirst().get(); | |
Files.lines(Path.of("path/to/file")) | |
.map(line -> line.split(",")[idx]) | |
.map(Double::parseDouble) | |
.sorted(Comparator.reverseOrder()) | |
.limit(10) | |
.forEach(System.out::println); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ( | |
"encoding/csv" | |
"fmt" | |
"os" | |
"sort" | |
"strconv" | |
) | |
func topSalariesCSV(path string) ([]int, error) { | |
f, err := os.Open(path) | |
if err != nil { | |
return nil, err | |
} | |
defer func() { | |
if err := f.Close(); err != nil { | |
panic(err) | |
} | |
}() | |
reader := csv.NewReader(f) | |
lines, err := reader.ReadAll() | |
if err != nil { | |
return nil, fmt.Errorf("parse csv | %v", err) | |
} | |
col := sort.SearchStrings(lines[0], "Salary") | |
top := []int{} | |
for _, line := range lines[1:] { | |
v, err := strconv.Atoi(line[col]) | |
if err != nil { | |
return nil, fmt.Errorf("atoi | %v", err) | |
} | |
top = append(top, v) | |
} | |
sort.Sort(sort.Reverse(sort.IntSlice(top))) | |
if len(top) > 10 { | |
top = top[0:10] | |
} | |
return top, nil | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use bigdecimal::BigDecimal; | |
use serde::Deserialize; | |
use std::collections::BinaryHeap; | |
use std::cmp::Reverse; | |
const MAX: usize = 10; | |
#[derive(Debug, Deserialize)] | |
#[serde(rename_all(deserialize = "PascalCase"))] | |
struct Record { | |
salary: BigDecimal, | |
} | |
fn main() -> Result<(), anyhow::Error> { | |
let mut heap = BinaryHeap::with_capacity(MAX + 1); | |
let mut rdr = csv::Reader::from_reader(std::io::stdin()); | |
for result in rdr.deserialize() { | |
let record: Record = result?; | |
heap.push(Reverse(record.salary)); | |
if heap.len() > MAX { | |
heap.pop(); | |
} | |
} | |
let vec = heap.into_sorted_vec(); | |
eprintln!("{:#?}", vec); | |
Ok(()) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$csv = @file_get_contents ($path); | |
$headings = explode (',', array_unshift ($csv)); | |
$headings_ords = @array_flip ($headings); | |
$salary_ord = @$headings_ords['Salary'] or die ('Salary column not found'); | |
$salaries = []; | |
foreach ($csv as $csv_line) { | |
$line_values = explode (',', $csv_line); | |
$salaries[] = $line_values[$salary_ord]; | |
} | |
rsort ($salaries); | |
$top_salaries = array_splice ($salaries, 0, 10); | |
echo implode ("\n", $top_salaries); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat /path/to/csv.csv \ | |
| awk -FSalary 'NR==1{i=gsub(",",0,$1)+1;FS=","} NR>1{print($i)}' \ | |
| sort -rn \ | |
| head -n10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with open("table.tsv", "r") as f: | |
sal_col = f.__next__().rstrip().split(";").index("salary") | |
print(sorted([int(x.rstrip().split(";")[sal_col]) for x in f])[-10:]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sal_col = iter(open("table.tsv")).__next__().rstrip().split(";").index("salary") | |
print(sorted(map(int, [x.rstrip().split(";")[sal_col] for x in iter(open("table.tsv"))][1:]))[-10:][::-1]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
fs.readFile(path, 'utf8' , (err, data) => { | |
let col = data.split('\n')[0].split(',').indexOf('Salary') | |
let salaries = data.split('\n').slice(1).map(line => line.split(',')[col]*1) | |
let top10 = salaries.sort().reverse().slice(0, 10) | |
}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
perl -MText::CSV=csv -le 'print join "\n", (map { $_->{Salary} } sort { $b->{Salary} <=> $a->{Salary} } @{csv (in => $ARGV[0], headers => "auto")})[0..9]' path |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
CSV.table('path/to/data.csv')[:salary].max(10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var splitLines = File.ReadLines(fn).Select(l => l.Split(',')); | |
var idx = Array.IndexOf(splitLines.First(), "salary"); | |
var salary = splitLines.Skip(1) | |
.Select(l => int.Parse(l[idx])) | |
.OrderByDesc(_ => _) | |
.Take(10) | |
.Sum(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jq -nR '[inputs | split(",")] | [.[1:][][first | index("salary")] | tonumber] | sort | reverse[:10][]' salaries.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::cmp::Reverse; | |
use std::collections::BinaryHeap; | |
use std::fs::File; | |
use std::io::{prelude::*, BufReader}; | |
use std::str; | |
fn best_salaries(filename: &str) -> Option<Vec<u64>> { | |
let file = File::open(filename).ok()?; | |
let reader = BufReader::new(file); | |
let mut lines = reader.split(b'\n'); | |
let header = lines.next()?.ok()?; | |
let salary_pos = header.split(|x| *x == b',').position(|x| x == b"Salary")?; | |
let iter = lines.filter_map(|x| { | |
str::from_utf8(x.ok()?.split(|x| *x == b',').nth(salary_pos)?) | |
.ok()? | |
.parse::<u64>() | |
.ok() | |
}); | |
let mut heap = BinaryHeap::new(); | |
for x in iter { | |
heap.push(Reverse(x)); | |
if heap.len() > 10 { | |
heap.pop(); | |
} | |
} | |
Some(heap.into_sorted_vec().into_iter().map(|x| x.0).collect()) | |
} | |
fn main() { | |
dbg!(best_salaries("a.csv")); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn total_salary(fname: &str) -> Vec<u64> { | |
let lines = std::fs::read_to_string(fname).unwrap(); | |
let lines: Vec<Vec<_>> = lines | |
.split("\n") | |
.filter(|&x| x != "") | |
.map(|x| x.split(",").collect()) | |
.collect(); | |
let i = lines[0].iter().position(|&x| x == "Salary").unwrap(); | |
let mut salaries: Vec<_> = lines[1..] | |
.iter() | |
.map(|s| s[i].parse::<u64>().unwrap()) | |
.collect(); | |
salaries.sort_by(|a, b| b.cmp(a)); | |
salaries.into_iter().take(10).collect() | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
top10Salaries :: FilePath -> IO () | |
top10Salaries path = do | |
Just (h, t) <- uncons . T.lines <$> T.readFile path | |
let | |
split = T.splitOn "," | |
Just ind = elemIndex "Salary" $ split h | |
top10 :: [Int] = t | |
& map (\s -> read $ T.unpack $ split s !! ind) | |
& sortBy (flip compare) | |
& take 10 | |
forM_ top10 print |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.io.Source | |
def topTenSalaries(path: String): List[Double] = { | |
val header +: body = Source.fromFile(path).getLines().toList | |
val column = header.split(",").indexOf("Salary") | |
body | |
.map(s => s.split(",")(column).toDouble) | |
.sorted(Ordering[Double].reverse) | |
.take(10) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CSV, DataFrames | |
df = CSV.read("foo.csv", DataFrame, delim=",") | |
println(first(sort!(df.Salary, rev=true),10)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <Foundation/Foundation.h> | |
int main(int argc, char **argv) | |
{ | |
if (argc != 2) { | |
return 1; | |
} | |
NSString *inputPath = @(argv[1]); | |
NSError *error; | |
NSString *inputString = [NSString stringWithContentsOfFile:inputPath encoding:NSUTF8StringEncoding error:&error]; | |
if (error != nil) { | |
return 1; | |
} | |
NSArray<NSString *> *lines = [inputString componentsSeparatedByString:@"\n"]; | |
const NSUInteger lineCount = lines.count; | |
if (lineCount < 2) { | |
return 1; | |
} | |
NSArray<NSString *> *columnNames = [lines[0] componentsSeparatedByString:@","]; | |
const NSUInteger salaryColumnIndex = [columnNames indexOfObject:@"Salary"]; | |
if (salaryColumnIndex == NSNotFound) { | |
return 1; | |
} | |
NSMutableArray<NSNumber *> *topSalaries = [NSMutableArray new]; | |
for (NSUInteger lineIndex = 1; lineIndex < lineCount; lineIndex++) { | |
NSString *line = lines[lineIndex]; | |
if (line.length == 0) { | |
continue; | |
} | |
NSArray<NSString *> *values = [line componentsSeparatedByString:@","]; | |
if (values.count - 1 < salaryColumnIndex) { | |
return 1; | |
} | |
const NSInteger salary = values[salaryColumnIndex].integerValue; | |
NSUInteger lo = 0; | |
NSUInteger hi = topSalaries.count; | |
while (lo < hi) { | |
const NSUInteger salaryIndex = (hi + lo) / 2; | |
const NSInteger topSalary = topSalaries[salaryIndex].integerValue; | |
if (salary < topSalary) { | |
lo = salaryIndex + 1; | |
} else if (salary > topSalary) { | |
hi = salaryIndex; | |
} else { | |
break; | |
} | |
} | |
[topSalaries insertObject:@(salary) atIndex:lo]; | |
while (topSalaries.count > 10) { | |
[topSalaries removeLastObject]; | |
} | |
} | |
for (NSUInteger idx = 0; idx < topSalaries.count; idx++) { | |
NSLog(@"%lu: %ld", idx + 1, topSalaries[idx].integerValue); | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
CSV.foreach('foo.csv', headers: true) | |
.map{ |row| row['Salary'].to_f } | |
.sort | |
.reverse[0,10] | |
.each{ |salary| puts salary } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$csv = array_map('str_getcsv', file("data.csv")); | |
$i = array_search("Salary", $csv[0]); | |
array_walk($csv, function(&$row) use ($i) { $row = $row[$i]; }); | |
array_shift($csv); | |
arsort($csv); | |
$sortedSalary = array_slice($csv, 0, 10); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function totalSalary(path) { | |
return require('fs/promises') | |
.readFile('/etc/passwd') | |
.then(content => { | |
const lines = content.split('\n'); | |
const column = lines[0].split(',').findIndex('Salary'); | |
return lines | |
.slice(1). | |
.map(line => Number(line.split(',')[colum])) | |
.sort((a, b) => a - b) // i dunno, maybe b - a | |
.slice(0, 10) | |
; | |
}) | |
; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sqlite> .import --csv salaries.csv salaries | |
sqlite> select * from salaries order by salary desc limit 10; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char (isSpace) | |
import Data.List (elemIndex, sort) | |
import Data.List.Split (splitOn) | |
topSalaries :: String -> IO (Maybe [Int]) | |
topSalaries path = do | |
head:rows <- lines <$> readFile path | |
return $ do | |
salaryIdx <- ("Salary" `elemIndex`) . splitOn "," $ head | |
return | |
. take 10 . reverse . sort | |
. map (read . (!! salaryIdx) . splitOn ",") | |
$ rows | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def total_salary(path) do | |
[head | tail] = File.read!(path) | |
|> String.split("\n") | |
|> Enum.map(fn v -> String.split(v, ",") end) | |
col = Enum.find_index(head, fn v -> v == "Salary" end) | |
tail | |
|> Enum.map(fn v -> Enum.at(v, col) end) | |
|> Enum.sort() | |
|> Enum.reverse() | |
|> Enum.take(10) | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
salaries = CSV.parse(File.read('./salaries.csv'), headers: true) | |
p salaries['Salary'].map(&:to_f).max(10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn total-salary [path] | |
(with-open [rdr (io/reader path)] | |
(let [[header & body] (line-seq rdr) | |
col (.indexOf | |
(str/split header #",") | |
"Salary")] | |
(->> body | |
(map #(str/split % #",")) | |
(map #(nth % col)) | |
(map #(Double/parseDouble %)) | |
(sort) | |
(reverse) | |
(take 10))))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn top-10-salaries [path] | |
(with-open [rdr (io/reader path)] | |
(let [[header & body] (line-seq rdr) | |
col (.indexOf (str/split header #",") "Salary")] | |
(reduce | |
(fn [heap line] | |
(let [salary (-> line (str/split #",") (nth col) (Double/parseDouble))] | |
(cond | |
(< (count heap) 10) (doto heap (.add salary)) | |
(> salary (.peek heap)) (doto heap (.poll) (.add salary)) | |
:else heap))) | |
(java.util.PriorityQueue. 10) body)))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
pd.read_csv("/../you_path")['Salary'].sort_values(ascending=False)[:10] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
&AtServerNoContext | |
Function Top10Salaries(FilePath) | |
TextReader = New TextReader(FilePath); | |
Salaries = New ValueList; | |
Try | |
String = TextReader.ReadLine(); | |
LineNumber = 1; | |
ColumnIndex = StrSplit(String, ",").Find("Salary"); | |
While String <> Undefined Do | |
If LineNumber > 1 Then | |
Salary = StrSplit(String, ",")[ColumnIndex]; | |
Salaries.Add(Salary); | |
EndIf; | |
LineNumber = LineNumber + 1; | |
String = TextReader.ReadLine(); | |
EndDo; | |
Salaries.SortByValue(SortDirection.Desc); | |
While Salaries.Count() > 10 Do | |
Salaries.Delete(Salaries.Count() - 1); | |
EndDo; | |
Except | |
TextReader.Close(); | |
EndTry; | |
Return Salaries; | |
EndFunction |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static async Task<IEnumerable<double>> TotalSalary(string filePath) | |
{ | |
var cells = (await File.ReadAllLinesAsync(filePath)) | |
.Select(l => l.Split(",")) | |
.ToArray(); | |
var salaryIndex = Array.IndexOf(cells[0], "Salary"); | |
return cells | |
.Skip(1) | |
.Select(c => double.Parse(c[salaryIndex])) | |
.OrderByDescending(s => s) | |
.Take(10); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Salary<10$#^%&#$%&#$%dat.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is it worth mentioning that
sort.take(10)
andmax(10)
aren't exactly of the same complexity?Sorting suggests O(n log n) run complexity and O(n) space complexity. While
max
can handle the same with O(n) and O(1) with just one iteration over the set and a bit above 10 memory slots needed.There is a data structure I forgot the name of that is O(1) efficient for keeping M max values (10 in our case).
I have no certainty that even languages with inherently lazy sorting can optimize subsequent picking of 10 elements to use the approach similar to what
max
does.