Created
September 17, 2014 07:31
-
-
Save ewels/a027fcb36664ce964a1b to your computer and use it in GitHub Desktop.
Cytoband Coordinate Converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use CGI::Carp qw(fatalsToBrowser); # pipe errors to the browser instead of the terminal for debugging | |
use warnings; | |
use strict; | |
use Data::Dumper; | |
#print Dumper(\%args); | |
print "Content-type: text/plain\n\n"; | |
############################################################# | |
# Name: Cytobands - Coordinates: Online Version # | |
# Author: Phil Ewels # | |
# Version 1.0 - 27/03/2012 # | |
# --------------------------------------------------------- # | |
# Converts a list of cytogenetic bands to genome # | |
# coordinates (and visa versa?) # | |
# --------------------------------------------------------- # | |
# Cytobands - Coordinates is licensed under a Creative # | |
# Commons Attribution-ShareAlike 3.0 Unported License. # | |
# Based on a work at www.tallphil.co.uk. # | |
############################################################# | |
#loads CGI with standard shortcuts | |
use CGI qw/:standard/; | |
# Setup - input from web interface | |
my $cytobands_input = param('cytobands') || ''; | |
my $genome = param('genome') || ''; | |
if($cytobands_input eq ''){ | |
# no variables - redirect to form | |
print redirect('index.html'); | |
exit; | |
} | |
# clean data to stop any nasties and set a default | |
if($genome eq '' || ($genome ne 'hg19' && $genome ne 'mm9')){ | |
$genome = 'hg19'; | |
} | |
my $i; | |
######## | |
# Parse cytobands | |
######## | |
my @input_lines = split(/\n/, $cytobands_input); | |
my @input; | |
warn "\n"; | |
foreach (@input_lines) { | |
# look for p or q | |
my $match; | |
if($genome eq 'mm9'){ | |
$match = '^\D*(\d+)\s*([abcdefghpa])(([abcdefgh])?\d+(\.\d+)?)\D*$'; | |
} else { | |
$match = '^\D*(\d+)\s*([pq])(\d+(\.\d+)?)\D*$'; | |
} | |
#print "input = $_\nmatch = $match\n"; | |
$_ =~ m/$match/i; | |
my $before = $1; # anything that came before the match | |
my $pq = $2; # the match | |
my $after = $3; # anything after the match | |
#print "before = $before\npq = $pq\nafter = $after\n"; | |
if($pq =~ m/[A-H]/i && $genome eq 'mm9'){ | |
$after = $pq.$after; | |
$pq = 'q'; | |
} | |
# pull out relevant chunks | |
my ($chr) = $before =~ m/(\d+)\s*$/; | |
my $band; | |
if($genome eq 'mm9'){ | |
($band) = $after =~ m/\s*([\dA-H]+(\.\d+)?)/i; | |
} else { | |
($band) = $after =~ m/\s*(\d+(\.\d+)?)/; | |
} | |
#print "chr = $chr\nband=$band\n"; | |
# check and write to array | |
if($chr ne '' && $pq ne '' && $band ne ''){ | |
push @input, {'chr' => $chr, 'pq' => $pq, 'band' => $band, 'type' => 'cytoband'}; | |
warn "Interpreted $chr$pq$band from: ".$_." \n"; | |
} else { | |
### not a cytoband, is it a coordinate? | |
$_ =~ s/(:|-)/ /g; # swap common separaters with whitespace | |
$_ =~ s/[^\d\sXY]+//g; # remove anything that's not a number or whitespace, or X or Y | |
my @sections = split(/\s/); | |
my $numsections = @sections; | |
if($numsections eq 3) { | |
$chr = $sections[0]; | |
my $start = $sections[1]; | |
my $end = $sections[2]; | |
push @input, {'chr' => $chr, 'start' => $start, 'end' => $end, 'type' => 'coordinate'}; | |
warn "Interpreted $chr:$start-$end from: ".$_." \n"; | |
} else { | |
warn "Couldn't understand line: ".$_."\n"; | |
} | |
} | |
} | |
warn "\n"; | |
my $numinput = @input; | |
if($numinput == 0){ | |
print "Error - No input lines recognised."; | |
exit; | |
} | |
######## | |
# Convert parsed cytobands to co-ordinates | |
######## | |
my @conversions; | |
my $referencefile = "cytoBand_$genome.txt"; | |
open (IN,$referencefile) or die "Can't read file $referencefile: $!"; | |
while (<IN>) { | |
chomp; | |
my @sections = split(/\t/); | |
#warn $sections[0]."\t".$sections[3]."\t".$sections[1]."\t".$sections[2]."\n"; | |
for ($i=0; $i<@input; $i++) { | |
my $type = $input[$i]->{type}; | |
my $chr = $input[$i]->{chr}; | |
if($type eq 'cytoband'){ | |
my $pq = $input[$i]->{pq}; | |
my $band = $input[$i]->{band}; | |
# same chr? | |
if("chr$chr" eq $sections[0]){ | |
# same arm? | |
if ($sections[3] =~ /($pq)/){ | |
#start with same band? | |
my $pqband = $pq.$band; | |
if($sections[3] =~ /($pqband)/){ | |
my $start = $sections[1]; | |
my $end = $sections[2]; | |
# Do we already have a match for this? | |
if(exists($conversions[$i])){ | |
#warn "chr$chr$pq$band\t".$sections[0].$sections[3]."\texists\t$chr:$start-$end\n"; | |
if($conversions[$i]{'start'} > $start){ | |
$conversions[$i]{'start'} = $start; | |
} | |
if($conversions[$i]{'end'} < $end){ | |
$conversions[$i]{'end'} = $end; | |
} | |
} else { | |
#warn "chr$chr$pq$band\t".$sections[0].$sections[3]."\tnew\t$chr:$start-$end\n"; | |
if($genome eq 'mm9'){ | |
$conversions[$i]{'input'} = "chr$chr$band"; | |
} else { | |
$conversions[$i]{'input'} = "chr$chr$pq$band"; | |
} | |
$conversions[$i]{'chr'} = $chr; | |
$conversions[$i]{'start'} = $start; | |
$conversions[$i]{'end'} = $end; | |
$conversions[$i]{'type'} = 'coordinate'; | |
} | |
} | |
} | |
} | |
} elsif($type eq 'coordinate') { | |
my $start = $input[$i]->{start}; | |
my $end = $input[$i]->{end}; | |
# same chr? | |
if("chr$chr" eq $sections[0]){ | |
# in this window? | |
if($start ge $sections[1] && $end le $sections[2]){ | |
my $cytoband = $sections[3]; | |
$conversions[$i]{'cytoband'} = $chr.$cytoband; | |
$conversions[$i]{'input'} = "$chr:$start-$end"; | |
$conversions[$i]{'type'} = 'cytoband'; | |
} | |
} | |
} | |
} | |
} | |
my $numconversions = @conversions; | |
if($numconversions == 0){ | |
print "Error - No conversions found. $numinput input lines parsed:\n\n".join("\n", Dumper(@input)); | |
exit; | |
} | |
######## | |
# Output results | |
######## | |
foreach my $conversion ( @conversions) { | |
if($conversion->{type} eq 'coordinate'){ | |
print $conversion->{chr}, "\t"; | |
print $conversion->{start}, "\t"; | |
print $conversion->{end}, "\t"; | |
print $conversion->{input}, "\n"; | |
} elsif($conversion->{type} eq 'cytoband'){ | |
print $conversion->{cytoband}, "\t"; | |
print $conversion->{input}, "\n"; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US"> | |
<head> | |
<title>Cytoband Coordinate Converter</title> | |
<meta name="keywords" content="Phil Ewels, Cytoband Converter, Cytogenetic Bands, Chromosomal Position, Bioinformatics" /> | |
<meta name="copyright" content="copyright 2012 Phil Ewels" /> | |
<link rel="stylesheet" type="text/css" href="styles.css" /> | |
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> | |
</head> | |
<body> | |
<h1>Cytogenetic Band : Genome Coordinate Converter</h1> | |
<p>Enter a list of either cytogenetic bands or genomic co-ordinates, and this script will convert one to the other.</p> | |
<form method="post" action="cytobands.pl" enctype="multipart/form-data"> | |
<p><label>Species: <select name="genome"><option value="hg19">Human (hg19)</option><option value="mm9">Mouse (mm9)</option></select></label></p> | |
<p><label>Cytogenetic bands in the format <strong><em>[chr][arm][band]</em></strong> or coordinate in the format <strong><em>[chr]:[start]-[end]</em></strong><br /><br /> | |
<small><em>For example: 10q26.11 or 10:129342-234987</em></small><br /><br /> | |
<textarea name="cytobands"></textarea></label></p> | |
<p><input type="submit" value="Submit"></p> | |
</form> | |
<p>Conversions are made using the UCSC chromosome band map, available through the <a href="http://genome.ucsc.edu/cgi-bin/hgTables" target="_blank">UCSC table browser</a>.</p> | |
<p>Please note that this map is <strong>not</strong> identical to that used by <em>ensembl</em>.</p> | |
<p class="licence"><a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/"><img alt="Creative Commons Licence" style="border-width:0" src="http://i.creativecommons.org/l/by-sa/3.0/88x31.png" /></a><br /> | |
<span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/InteractiveResource" property="dct:title" rel="dct:type">Cytoband Converter</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://phil.ewels.co.uk" property="cc:attributionName" rel="cc:attributionURL">Phil Ewels</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution-ShareAlike 3.0 Unported License</a>.<br />Based on a work at <a xmlns:dct="http://purl.org/dc/terms/" href="http://www.tallphil.co.uk/cytoband/" rel="dct:source">www.tallphil.co.uk</a>.</p> | |
<!-- Google Analytics Code --> | |
<script type="text/javascript"> | |
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); | |
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); | |
try { var pageTracker = _gat._getTracker("UA-10500167-1"); pageTracker._trackPageview(); } catch(err) {} | |
</script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* CSS Styles for cytobands.pl */ | |
body { | |
margin:0; | |
padding:20px 40px; | |
border-top:4px solid #00FBFF; | |
color: #2d2d2d; | |
font-family: sans-serif; | |
font-size:12pt; | |
} | |
textarea { | |
min-width:600px; | |
width:50%; | |
height:300px; | |
} | |
input { | |
font-size:x-large; | |
padding:10px 15px; | |
} | |
.monospace { | |
font-family:monospace; | |
} | |
.licence { | |
color:#999; | |
font-size:8pt; | |
margin-top:20px; | |
padding-top:20px; | |
border-top:1px solid #ccc; | |
} | |
.licence a { | |
color:#999; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment