Skip to content

Instantly share code, notes, and snippets.

Created March 8, 2012 17:57

Revisions

  1. @invalid-email-address Anonymous created this gist Mar 8, 2012.
    46 changes: 46 additions & 0 deletions entrenzgeneid
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,46 @@
    #!/usr/bin/perl
    use Bio::DB::EUtilities;

    # this needs to be a list of EntrezGene unique IDs
    my @ids = @ARGV;

    my $eutil = Bio::DB::EUtilities->new(-eutil => 'esummary',
    -email => 'mymail@foo.bar',
    -db => 'gene',
    -id => \@ids);

    my $fetcher = Bio::DB::EUtilities->new(-eutil => 'efetch',
    -email => 'mymail@foo.bar',
    -db => 'nucleotide',
    -rettype => 'gb');

    while (my $docsum = $eutil->next_DocSum) {
    # to ensure we grab the right ChrStart information, we grab the Item above
    # it in the Item hierarchy (visible via print_all from the eutil instance)
    my ($item) = $docsum->get_Items_by_name('GenomicInfoType');

    my %item_data = map {$_ => 0} qw(ChrAccVer ChrStart ChrStop);

    while (my $sub_item = $item->next_subItem) {
    if (exists $item_data{$sub_item->get_name}) {
    $item_data{$sub_item->get_name} = $sub_item->get_content;
    }
    }
    # check to make sure everything is set
    for my $check (qw(ChrAccVer ChrStart ChrStop)) {
    die "$check not set" unless $item_data{$check};
    }

    my $strand = $item_data{ChrStart} > $item_data{ChrStop} ? 2 : 1;
    printf("Retrieving %s, from %d-%d, strand %d\n", $item_data{ChrAccVer},
    $item_data{ChrStart},
    $item_data{ChrStop},
    $strand
    );

    $fetcher->set_parameters(-id => $item_data{ChrAccVer},
    -seq_start => $item_data{ChrStart} -5000 ,
    -seq_stop => $item_data{ChrStop} + 5000 ,
    -strand => $strand);
    print $fetcher->get_Response->content;
    }