Skip to content

Instantly share code, notes, and snippets.

@gempesaw
Last active October 1, 2015 19:54
Show Gist options
  • Save gempesaw/f9cfe6cfb93e03773bcd to your computer and use it in GitHub Desktop.
Save gempesaw/f9cfe6cfb93e03773bcd to your computer and use it in GitHub Desktop.
use strict;
use warnings;
use List::Util qw/any/;
die 'eh what' unless -e '/tmp/prodUrlsByFrequency';
`cut -d '/' -f 2- /tmp/prodUrlsByFrequency | grep -i -v ajax | grep -v user.*settings | grep -v getBooks | sed 's/^/\\//' > /tmp/urls`;
`cut -d '/' -f 2 /tmp/urls | sort | uniq > /tmp/groups`;
my $filename = '/tmp/groups';
open (my $fh, '<', $filename);
my (@file) = <$fh>;
close ($fh);
my @todo_groups = qw/admin askmd assessments register search/;
my @skipped_groups = qw/api
conversation
doctor-visit-guides
health-
healthcheck
login
oauth
profanityCheck
quizzes
resetpassword
robots
server-status
slideshows
teams
topics
unsubscribe
voice
whotofollow
widgetsearch
/;
my $skipped_groups = join('|', @skipped_groups);
my @group = map { chomp; "/$_" }
grep { $_ !~ /^($skipped_groups)/ }
@file;
shift @group;
my $gatling = '/opt/gatling-load-tests/user-files/data';
my $urls = "/tmp/urls";
foreach my $group (@group) {
my $grep = "grep '^$group' $urls";
my $group_urls = `$grep`;
# my $filename = "$gatling/user-files/data/$group.csv";
my $todo = '';
if (any { $group =~ /$_/i } @todo_groups) {
$todo = 'TODO';
}
my $filename = "$gatling/$group$todo.csv";
open (my $fh, '>', $filename);
print $fh "url\n";
print $fh $group_urls;
close ($fh);
}
# post processing
# create a blog.csv
`echo "url" > $gatling/blog.csv`;
`grep blog $gatling/user.csv >> $gatling/blog.csv`;
`grep -v blog $gatling/user.csv > $gatling/user.csvtemp && mv $gatling/user.csvtemp $gatling/user.csv`;
# remove suffixes from user urls. the awk removes duplicates without
# sorting; see
# http://stackoverflow.com/questions/11532157/unix-removing-duplicate-lines-without-sorting
`cut -d '/' -f -3 $gatling/user.csv | awk '!x[\$0]++' > $gatling/user.csvtemp && mv $gatling/user.csvtemp $gatling/user.csv`;
# remove suffixes from group urls, such that group.csv only contains
# group urls, and not things like /group/url/videos or
# /group/url/answers/search
`cut -d '/' -f -3 $gatling/group.csv | awk '!x[\$0]++' > $gatling/group.csvtemp && mv $gatling/group.csvtemp $gatling/group.csv`;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment