Forked from talawahtech/extreme-benchmark-environment.yaml
Created
April 29, 2023 20:59
Revisions
-
talawahtech revised this gist
May 21, 2021 . No changes.There are no files selected for viewing
-
talawahtech revised this gist
May 21, 2021 . No changes.There are no files selected for viewing
-
talawahtech revised this gist
May 21, 2021 . No changes.There are no files selected for viewing
-
talawahtech created this gist
May 20, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,261 @@ AWSTemplateFormatVersion: '2010-09-09' Description: Extreme Performance Tuning Benchmark Environment Parameters: AmiId: Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id> Default: '/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2' InstanceKeyPair: Type: AWS::EC2::KeyPair::KeyName InstanceSecurityGroup: Type: AWS::EC2::SecurityGroup::Id InstanceSubnet: Type: AWS::EC2::Subnet::Id InstanceVolumeSize: Type: Number Default: 8 Resources: Client: Type: AWS::EC2::Instance Properties: InstanceType: 'c5n.4xlarge' Tags: - Key: 'Name' Value: 'extreme-client' - Key: 'Role' # Used by cloud-init script to conditionally apply changes to only the client or server Value: 'client' LaunchTemplate: LaunchTemplateId: !Ref 'LaunchTemplate' Version: !GetAtt 'LaunchTemplate.LatestVersionNumber' Server: Type: AWS::EC2::Instance Properties: InstanceType: 'c5n.xlarge' Tags: - Key: 'Name' Value: 'extreme-server' - Key: 'Role' # Used by cloud-init script to conditionally apply changes to only the client or server Value: 'server' LaunchTemplate: LaunchTemplateId: !Ref 'LaunchTemplate' Version: !GetAtt 'LaunchTemplate.LatestVersionNumber' ClusterPlacementGroup: Type: AWS::EC2::PlacementGroup Properties: Strategy: cluster # Allows 'aws ec2 describe-tags' to be called from the cloud-init script so it can differentiate client from server Ec2Role: Type: AWS::IAM::Role Properties: Path: / Policies: - PolicyName: 'AllowInstanceLogs' PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: [ 'ec2:DescribeTags' ] Resource: '*' AssumeRolePolicyDocument: Statement: - Effect: Allow Principal: Service: ['ec2.amazonaws.com'] Action: ['sts:AssumeRole'] Ec2InstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Path: / Roles: [!Ref 'Ec2Role'] LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateName: !Ref 'AWS::StackName' LaunchTemplateData: ImageId: !Ref 'AmiId' KeyName: !Ref 'InstanceKeyPair' IamInstanceProfile: Arn: !GetAtt 'Ec2InstanceProfile.Arn' Placement: GroupName: !Ref 'ClusterPlacementGroup' NetworkInterfaces: - DeviceIndex: 0 Ipv6AddressCount: 0 # Ensure that we don't get assigned any IPv6 addresses, even if it is the default for the subnet SubnetId: !Ref 'InstanceSubnet' Groups: - !Ref 'InstanceSecurityGroup' BlockDeviceMappings: - DeviceName: '/dev/xvda' Ebs: VolumeSize: !Ref 'InstanceVolumeSize' VolumeType: 'gp3' UserData: Fn::Base64: !Sub | Content-Type: multipart/mixed; boundary="==BOUNDARY==" MIME-Version: 1.0 --==BOUNDARY== Content-Type: text/cloud-config; charset="us-ascii" Content-Disposition: attachment; filename="cloud-config.txt" # Automatically reboot after cloud-init completes to apply kernel param changes power_state: mode: reboot message: Rebooting to apply new kernel params timeout: 10 condition: True bootcmd: # These commands run on every boot, not just the first boot #### Disable iptables - modprobe -rv ip_tables ##### ENA driver configuration. Disable generic receive offloading - ethtool -K eth0 gro off ##### ENA driver configuration. Enable adaptive IRQ coalescing (server only) - export INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) - echo INSTANCE_ID = ${!INSTANCE_ID} - export INSTANCE_ROLE=$(aws ec2 describe-tags --region ${AWS::Region} --filters "Name=resource-id,Values=${!INSTANCE_ID}" "Name=key,Values=Role" --output text | cut -f5) - echo INSTANCE_ROLE = ${!INSTANCE_ROLE} - if [ "${!INSTANCE_ROLE}" == "server" ]; then ethtool -C eth0 adaptive-rx on; fi - if [ "${!INSTANCE_ROLE}" == "server" ]; then ethtool -C eth0 tx-usecs 256; fi ##### Disable irqbalance and fix IRQs to cpus. Assumes # of irqs/queues = # of cpus!!! ## Note ${!} is the CF escape sequence for the bash equivalent and ${!!} is needed to get a literal ${!} ## sleep to give irqbalance time to shutdown before manually setting the values - systemctl stop irqbalance.service - echo sleeping - sleep 5 - export IRQS=($(grep eth0 /proc/interrupts | awk '{print $1}' | tr -d :)) - for i in ${!!IRQS[@]}; do echo $i > /proc/irq/${!IRQS[i]}/smp_affinity_list; done; - echo irq affinity - for i in ${!!IRQS[@]}; do cat /proc/irq/${!IRQS[i]}/smp_affinity_list; done; ##### Setup Transmit Packet Steering (XPS) to map queue x to cpu x for outgoing packets. Assumes # of queues = # of cpus!!! ## A hex bitmap is used in this case, not the cpu id so we raise 2 to the power of i and convert it to hex ## Note ${!} is the CF escape sequence for the bash equivalent and ${!!} is needed to get a literal ${!} - export TXQUEUES=($(ls -1qdv /sys/class/net/eth0/queues/tx-*)) - for i in ${!!TXQUEUES[@]}; do printf '%x' $((2**i)) > ${!TXQUEUES[i]}/xps_cpus; done; - echo 'xps_cpus' - for i in ${!!TXQUEUES[@]}; do cat ${!TXQUEUES[i]}/xps_cpus; done; ## Stop dhclient and set address lifetime to "forever" - dhclient -x -pf /var/run/dhclient-eth0.pid - dhclient -x -pf /var/run/dhclient6-eth0.pid - ip addr change $( ip -4 addr show dev eth0 | grep 'inet' | awk '{ print $2 " brd " $4 " scope global"}') dev eth0 valid_lft forever preferred_lft forever packages: - git - gcc - make - htop - iperf3 - dstat - pcp-system-tools - perf - iproute-tc --==BOUNDARY== Content-Type: text/x-shellscript; charset="us-ascii" Content-Disposition: attachment; filename="user-data-script.txt" #!/bin/bash # Configure sysctls cat > /etc/sysctl.d/90-extreme.conf <<- EOF vm.swappiness=0 vm.dirty_ratio=80 net.core.somaxconn=2048 net.ipv4.tcp_max_syn_backlog=10000 net.core.busy_poll=1 net.core.default_qdisc=noqueue net.ipv4.tcp_congestion_control=reno EOF # Reload sysctl to pick up new configs sysctl -p # Disable ssm agent. It doesn't really affect throughput, but any network activity can affect p99 and stdev for latency systemctl stop amazon-ssm-agent systemctl disable amazon-ssm-agent # Install docker and stress-ng from amazon-linux-extras amazon-linux-extras enable -y docker testing yum install -y docker stress-ng # Add the ec2-user and to the docker group so you can execute Docker commands without using sudo usermod -a -G docker ec2-user # Configure and start docker with iptables support disabled mkdir -p /etc/systemd/system/docker.service.d/ cat > /etc/systemd/system/docker.service.d/startup_options.conf <<- EOF [Service] ExecStart= ExecStart=/usr/bin/dockerd -H fd:// --bridge=none --iptables=false --ip-forward=false --live-restore EOF systemctl daemon-reload systemctl enable docker systemctl start docker # Build (t)wrk # Note that the luajit-devel package comes from the amazon-linux-extras repo for BCC amazon-linux-extras enable BCC yum clean metadata yum install -y openssl11-devel luajit-devel-2.1.0 cd /home/ec2-user/ git clone https://github.com/talawahtech/wrk --single-branch --branch twrk twrk cd twrk make WITH_LUAJIT=/usr WITH_OPENSSL=/usr CFLAGS="-I /usr/include/luajit-2.1" mv twrk /usr/local/bin/ chown -R ec2-user:ec2-user /home/ec2-user/twrk/ # Build and run the libreactor (round 20) docker container on the server cd /home/ec2-user/ git clone https://github.com/TechEmpower/FrameworkBenchmarks --branch R20 --single-branch chown -R ec2-user:ec2-user /home/ec2-user/FrameworkBenchmarks/ cd FrameworkBenchmarks/frameworks/C/libreactor/ docker build . -f libreactor.dockerfile --network host -t libreactor docker build . -f libreactor-server.dockerfile --network host -t libreactor-server # Install Flamegraph tools cd /home/ec2-user/ git clone https://github.com/brendangregg/FlameGraph chown -R ec2-user:ec2-user /home/ec2-user/FlameGraph/ # Download custom palette.map wget -q https://gist.githubusercontent.com/talawahtech/b043e2dbf12af746de06b9b86c1a8b80/raw/ -O palette.map chown ec2-user:ec2-user /home/ec2-user/palette.map # Download network monitor script wget -q https://gist.githubusercontent.com/talawahtech/de78601f1201d9586ac19fff420024b8/raw/ -O netmonitor.sh chmod a+x netmonitor.sh mv netmonitor.sh /usr/local/bin/ #### Set kernel params to disable speculative execution mitigations. Requires a reboot to take effect, which is handled above sed -i 's/^GRUB_CMDLINE_LINUX_DEFAULT="/&nospectre_v1 nospectre_v2 pti=off mds=off tsx_async_abort=off /' /etc/default/grub grub2-mkconfig -o /boot/grub2/grub.cfg #### Disable syscall auditing (but otherwise leave auditd functioning). echo "-a never,task" > /etc/audit/rules.d/disable-syscall-auditing.rules /sbin/augenrules --load --==BOUNDARY==--