Skip to content

Instantly share code, notes, and snippets.

@claraj
Last active March 31, 2021 13:47

Revisions

  1. claraj revised this gist Mar 31, 2021. No changes.
  2. claraj revised this gist Mar 31, 2021. 2 changed files with 38 additions and 0 deletions.
    28 changes: 28 additions & 0 deletions dna_string_start.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    # How many A, C, G, T characters are in this string?
    #
    # if
    #
    # dna = 'CGATC'
    #
    # We could count easily, there are
    # 1 A
    # 2 C
    # 1 G
    # 1 T
    #
    # What about a longer string?
    #
    # Create a much long example string. How would you count the number of each character?

    dna = 'ACGACGGATACGCGGGAGCTATTCATCTGTGTTGAGAAACACCGGAGAACTTATTGGTCTGTCAAGATTGCGACTGTGGTATAGCTCACCCGGTCGCGGCTTTCTAGT' \
    'TAGTGGCCAGCTCCCGTGTATTTGGAAGCTGAGAGAAGGACCCCTGTGGTTCGAATCAGCTCACGAGCGCTGGCACACCGCAATCAGCCGGCTAATAAAATTCGTACG' \
    'GACTGCCCCACACAAGAAGACGGTAAATTTATCAACACTATAGTTGCTATACACCAGGAGCGAGCGTAAATTTGTAGCGGTCAGATTAACTTGCTGGGAACGAACCAT' \
    'TGTCGCCCTCTGCAGCAAGTTAGTTGGCATCATTGGTACTGCCCTTCACTGGTAGCAGCTCCCCCTGTAATATATCCGTGGCCACTATTCAAGGGCTCAAATAGGCGA' \
    'CCCAGAGACCATTATAGGCGGTACAGCGCTGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGAAGGATAATTCTGCGGGACCA' \
    'TGGTCGTCTCCTAACCTCAGGGTGGGATTCCTGGCAGGTGGACCGGGCGCGCATCGAGAGCATTCGGGGTTCCTACCAGCCAGGGAAATCGGGTCGACCACTAGGCAA' \
    'TGAGCGGCTCACACCGATTTTCTTAAGAGACGTAACAAAGCCCGCATTAACGGCTGGAGTGAATCACCGTACGACTACCTAAGCCTCATTGGGATCCACTGTAAACCC' \
    'CTTCGCCGGTGTTGGGTGTCCGCAACGCCTCTGCTTTTTGCGTACAGTCGGCGTGGTGGAGTCCGCGGCCATACTGGCGGTTGGTTTGTAGAACAGTGTAACGACGTG' \
    'TGTCACTGCCCCCCGTAGCTTCTATTGCCCTGTTTGGGAGGTTCTATAGGGGTTACAGAGTAGTTTTAAGTTTTAGCACGACAGCACCAGTATTGCCAGTGACGCCGT' \
    'TGAGGCCGCAAAAGTGATTAACCCCCGTGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTA'

    print(dna) # All on one line
    10 changes: 10 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    dna = """ACGACGGATACGCGGGAGCTATTCATCTGTGTTGAGAAACACCGGAGAACTTATTGGTCTGTCAAGATTGCGACTGTGGTATAGCTCACCCGGTCGCGGCTTTCTAGT' \
    'TAGTGGCCAGCTCCCGTGTATTTGGAAGCTGAGAGAAGGACCCCTGTGGTTCGAATCAGCTCACGAGCGCTGGCACACCGCAATCAGCCGGCTAATAAAATTCGTACG' \
    'GACTGCCCCACACAAGAAGACGGTAAATTTATCAACACTATAGTTGCTATACACCAGGAGCGAGCGTAAATTTGTAGCGGTCAGATTAACTTGCTGGGAACGAACCAT' \
    'TGTCGCCCTCTGCAGCAAGTTAGTTGGCATCATTGGTACTGCCCTTCACTGGTAGCAGCTCCCCCTGTAATATATCCGTGGCCACTATTCAAGGGCTCAAATAGGCGA' \
    'CCCAGAGACCATTATAGGCGGTACAGCGCTGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGAAGGATAATTCTGCGGGACCA' \
    'TGGTCGTCTCCTAACCTCAGGGTGGGATTCCTGGCAGGTGGACCGGGCGCGCATCGAGAGCATTCGGGGTTCCTACCAGCCAGGGAAATCGGGTCGACCACTAGGCAA' \
    'TGAGCGGCTCACACCGATTTTCTTAAGAGACGTAACAAAGCCCGCATTAACGGCTGGAGTGAATCACCGTACGACTACCTAAGCCTCATTGGGATCCACTGTAAACCC' \
    'CTTCGCCGGTGTTGGGTGTCCGCAACGCCTCTGCTTTTTGCGTACAGTCGGCGTGGTGGAGTCCGCGGCCATACTGGCGGTTGGTTTGTAGAACAGTGTAACGACGTG' \
    'TGTCACTGCCCCCCGTAGCTTCTATTGCCCTGTTTGGGAGGTTCTATAGGGGTTACAGAGTAGTTTTAAGTTTTAGCACGACAGCACCAGTATTGCCAGTGACGCCGT' \
    'TGAGGCCGCAAAAGTGATTAACCCCCGTGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTA'
  3. claraj revised this gist Oct 28, 2020. 1 changed file with 8 additions and 0 deletions.
    8 changes: 8 additions & 0 deletions links and notes.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@
    Sources:

    DNA structure & image; https://www.nature.com/scitable/topicpage/discovery-of-dna-structure-and-function-watson-397/
    Generate random DNA sequences http://bioinformatics.org/sms2/random_dna.html
    https://en.wikipedia.org/wiki/Start_codon
    General DNA info https://www.coursera.org/learn/dna-decoded
    Learning Bioinformatics - solving biology problems with programming: https://www.coursera.org/learn/bioinformatics
    Python bioinformatics problems: http://rosalind.info
  4. claraj revised this gist Oct 28, 2020. 1 changed file with 75 additions and 0 deletions.
    75 changes: 75 additions & 0 deletions base_counts.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,75 @@
    dna3 = 'ACGACGGATACGCGGGAGCTATTCATCTGTGTTGAGAAACACCGGAGAACTTATTGGTCTGTCAAGATTGCGACTGTGGTATAGCTCACCCGGTCGCGGCTTTCTAGT' \
    'TAGTGGCCAGCTCCCGTGTATTTGGAAGCTGAGAGAAGGACCCCTGTGGTTCGAATCAGCTCACGAGCGCTGGCACACCGCAATCAGCCGGCTAATAAAATTCGTACG' \
    'GACTGCCCCACACAAGAAGACGGTAAATTTATCAACACTATAGTTGCTATACACCAGGAGCGAGCGTAAATTTGTAGCGGTCAGATTAACTTGCTGGGAACGAACCAT' \
    'TGTCGCCCTCTGCAGCAAGTTAGTTGGCATCATTGGTACTGCCCTTCACTGGTAGCAGCTCCCCCTGTAATATATCCGTGGCCACTATTCAAGGGCTCAAATAGGCGA' \
    'CCCAGAGACCATTATAGGCGGTACAGCGCTGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGAAGGATAATTCTGCGGGACCA' \
    'TGGTCGTCTCCTAACCTCAGGGTGGGATTCCTGGCAGGTGGACCGGGCGCGCATCGAGAGCATTCGGGGTTCCTACCAGCCAGGGAAATCGGGTCGACCACTAGGCAA' \
    'TGAGCGGCTCACACCGATTTTCTTAAGAGACGTAACAAAGCCCGCATTAACGGCTGGAGTGAATCACCGTACGACTACCTAAGCCTCATTGGGATCCACTGTAAACCC' \
    'CTTCGCCGGTGTTGGGTGTCCGCAACGCCTCTGCTTTTTGCGTACAGTCGGCGTGGTGGAGTCCGCGGCCATACTGGCGGTTGGTTTGTAGAACAGTGTAACGACGTG' \
    'TGTCACTGCCCCCCGTAGCTTCTATTGCCCTGTTTGGGAGGTTCTATAGGGGTTACAGAGTAGTTTTAAGTTTTAGCACGACAGCACCAGTATTGCCAGTGACGCCGT' \
    'TGAGGCCGCAAAAGTGATTAACCCCCGTGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTA'

    dna4 = 'CAGAGCAATGTCTGTTAGATAATCTCTCGTCTGGATAGCGAGAAGTTTCCGGAAGACGATTGTTTCCAACGAAAGGGCTGATAACTACACTCTGTCGCGCTTCTTTCG' \
    'TGTTCGCCAAGGGCACATTGGTTTAAAAGTGATCTCGAGAGACGTTTTCCTGACTTGTTGTGTTATATCAACGTAACTTTTAAGTCATATTTTCTCCCTACCCCAGAC' \
    'TAGACGGGTTCCTTTCATCGTCCACCGAGTTGCTTACGAGCAUGACACTTAGCCGGGGAAAAGTTCGCAATTCCGCGACAGCGTCAGGTGTCAAACAGATCCAAGCGA' \
    'AGGCCGCCGTGTAACGGAGAATTGTGGGCGCAGTCAAATAGCTAATTATTGGGAAAGGCCAAGTGGAGTCCGTCAGCGGAACAGCCTGGGCGGACGCGCTGCCGCTCG' \
    'TTCACCTCGCCTGCCTTCGTGTTGGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTAGCGA' \
    'GAGACCGTTGAGGCGCGGGGCGATCCGCCCTTGAGTGGACTCCAAACACATTCGACGAAGGGGTGGGAACATAAGTTAATTGGAGGGTCGGGGAAGTCCCACGCCCGG' \
    'TCCCTACATGATTGCACATAGTTCGTTCACCAACGGGCGATCTTCCTCACACTAGAGGAACGAGTAGTACTCCAGACATTGAGTCAGTTGCAGACCAAGTGGAGGGAA' \
    'CGATTTTTAUGGGCCGCTCAGGTACTAGTGCTAGACCCTACAAACGGCACTGGTGACCCGCTCCCGAGTTTGCGCTGTTACGTGTCCCTTAAAGTATACTTCGATCCT' \
    'AACATCGCGGCCATACGACGCTTAAATATTTCACCAGTTGTGTTTCGCGCAUGGAGTTGTTCTGTGTTATCGGCGAGTCTCCATTGCACGTCATCAACTAAAAACCAC' \
    'GGCCACACAGACATGCCTTGATTCTTCCCGCGACGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGATAGTAGGTTTAACTATCA'

    print(len(dna3))

    # how many A, C, G, and T are in dna3?
    # what about dna4?
    # can you write a function?
    # and thinking more generally - what if you don't know what letters there are?
    #

    # Creating variables
    a_count = 0
    c_count = 0
    g_count = 0
    t_count = 0

    for base in dna3:
    if base == 'A':
    a_count += 1
    if base == 'C':
    c_count += 1
    if base == 'G':
    g_count += 1
    if base == 'T':
    t_count += 1

    print(a_count, c_count, g_count, t_count)

    # creating a dictionary of counts
    # how could you generate this from the text?
    base_counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0}

    # work with dictionary keys and values
    for base in dna3:
    if base == 'A':
    base_counts['A'] += 1
    if base == 'C':
    base_counts['C'] += 1
    if base == 'G':
    base_counts['G'] += 1
    if base == 'T':
    base_counts['T'] += 1

    print(base_counts)

    base_counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0}

    # but now we have a dictionary, we can make this much more general
    for base in dna3:
    base_counts[base] += 1

    print(base_counts)

    # Question: how could you generate this intial dictionary from the text?
    # Questions: can you put the counting code into a function?
  5. claraj revised this gist Oct 28, 2020. 1 changed file with 24 additions and 0 deletions.
    24 changes: 24 additions & 0 deletions reverse_dna.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,24 @@
    def other_strand(dna):
    dna_other_strand = ''
    for base in dna: # making more general variable names in a function
    if base == 'A':
    # it's pair is a T
    dna_other_strand = dna_other_strand + 'T'
    elif base == 'T':
    dna_other_strand = dna_other_strand + 'A'
    elif base == 'C':
    dna_other_strand = dna_other_strand + 'G'
    elif base == 'G':
    dna_other_strand = dna_other_strand + 'C'
    return dna_other_strand


    dna1 = 'ACCAGTACCAGTGT' # how can you make the other side?
    # TGGTCATGGTCACA # output

    # A pairs with T and T pairs with A
    # C pairs with G and G pairs with C
    dna2 = 'GTACACCAGGTCTA'

    print(other_strand(dna1))
    print(other_strand(dna2))
  6. claraj revised this gist Oct 28, 2020. 1 changed file with 34 additions and 30 deletions.
    64 changes: 34 additions & 30 deletions dna.py
    Original file line number Diff line number Diff line change
    @@ -9,46 +9,50 @@
    #


    dna1 = 'accagtaccagtgt'
    dna2 = 'gtacaccaggtcta'
    dna1 = 'ACCAGTACCAGTGT'
    dna2 = 'GTACACCAGGTCTA'

    # Remember
    # a pairs with t
    # c pairs with g
    # Remember
    # A pairs with T and T pairs with A
    # C pairs with G and G pairs with C

    # so for dna1, it begins acca... so output string will start tggt...
    # so for dna1, it begins ACCA... so output string will start TGGT...

    # In DNA, the ATCG are codes for generating proteins (you are made of 100's of different kinds of proteins).
    # In DNA, the A, T, C, G are codes for generating proteins
    # you are made of 100's of different kinds of proteins.
    # Most of your DNA doesn't appear to make proteins - only about 1% of it encodes protein.
    # A part of DNA that encodes a protein is called a gene So how do you find which parts do,
    # or where are your genes in your DNA?
    #
    # So biologist are interested in where certain codes are. One code is
    # ATG is called a 'start codon' and that means 'start making a protein here'
    # So biologist are interested in where certain codes are. One code is ATG and this
    # is called a 'start codon' and that means 'start making a protein here'
    #
    # Does string 1 have any genes in it?
    # Does string 2 have any genes in it?
    #
    # If so, what's the index of where that gene is?

    dna3 = 'acgatggatacgcgggagctattcatctgtgttgagaaacaccggagaacttattggtctgtcaagattgcgactgtggtatagctcacccggtcgcggctttctagt' \
    'tagtggccagctcccgtgtatttggaagctgagagaaggacccctgtggttcgaatcagctcacgagcgctggcacaccgcaatcagccggctaataaaattcgtatg' \
    'gactgccccacacaagaagacggtaaatttatcaacactatagttgctatacaccaggagcgagcgtaaatttgtagcggtcagattaacttgctgggaatgaaccat' \
    'tgtcgccctctgcagcaagttagatggcatgattggtactgcccttcactggtagcagctccccctgtaatatatccgtggccactattcaagggctcaaataggcga' \
    'ccatgagagaccattataggcggtacagcgatggtaggtttgcctgggcagatatcgttagccccttctgcgcgctataagatagcgaaggataattctgcgggacca' \
    'tggtcgtctcctaacctcagggtgggattcctggcaggtggaccgggcgcgcatcgagagcattcggggttcctaccagccagggaaatcgggtcgaccactaggcaa' \
    'tgagcggctcacaccgattttcttaagagacgtaacaaagcccgcatgaacggctggagtgaatcaccgtacgactacctaagcctcattgggatccactgtaaaccc' \
    'cttcgccggtgttgggtgtccgcaacgcctctgctttttgcgtacagtcggcgtggtggagtccgcggccatactggcggatggtttgtagaacagtgtaacgatgtg' \
    'tgtcactgccccccgtagcttctattgccatgtttgggaggttctataggggttacagagtagttttaagttttagcacgacagcaccagtattgccagtgatgccgt' \
    'tgaggccgcaaaagtgattaacccccgtgggaccggatacgttcccagcggcaatccttgtcttaccgccggactgcggagcgaagggagaagtaaccgtggtaatta'

    dna4 = 'cagagcaatgtctgttagataatctctcgtctggatagcgagaagtttccggaagacgattgtttccaacgaaagggctgataactacactctgtcgcgcttctttcg' \
    'tgttcgccatgggcacattggtttaaaagtgatctcgagagacgttttcatgacttgttgtgttatatcaacgtaacttttaagtcatattttctccctaccccagac' \
    'tagatgggttcctttcatcgtccaccgagttgcttacgagcaugacacttagccggggaaaatgttcgcaatgttccgcgacagcgtcaggtgtcaaacagaaagcga' \
    'aggccgccgtgtaacggagaattgtgggcgcagtcaaatagctaattattgggaaaggccatgtggagtccgtcagcggaacagcctgggcggacgcgctgccgctcg' \
    'ttcacctcgcctgccttcgtgttggggaccggatacgttcccagcggcaatccttgtcttaccgccggactgcggagcgaagggagaagtaaccgtggtaattagcga' \
    'gagaccgttgaggcgcggggcgatccgcccttgagtggactccaaacacattcgacgaaggggtgggaacataagttaattggagggtcggggaagtcccacgcccgg' \
    'tccctacatgattgcacatagttcgttcaccaacgggcgatcttcctcacactagaggaacgagtagtactccagacattgagtcagttgcagaccaagtggagggaa' \
    'cgatttttaugggccgctcaggtactagtgctagaatgcctacaaacggcactggtgacccgctcccgagtttgcgctgttacgtgtcccttaaagtatacttcgatc' \
    'aacatggcggccatacgacgcttaaatatttcaccagttgtgtttcgcgcauggagttgttctgtgttatcggcgagtctccattgcacgtcatcaactaaaaaccac' \
    'ggccacacagacatgccttgattcttcccgcgatggtaggtttgcctgggcagatatcgttagccccttctgcgcgctataagatagcgatggtaggtttaactatca'
    dna3 = 'ACGACGGATACGCGGGAGCTATTCATCTGTGTTGAGAAACACCGGAGAACTTATTGGTCTGTCAAGATTGCGACTGTGGTATAGCTCACCCGGTCGCGGCTTTCTAGT' \
    'TAGTGGCCAGCTCCCGTGTATTTGGAAGCTGAGAGAAGGACCCCTGTGGTTCGAATCAGCTCACGAGCGCTGGCACACCGCAATCAGCCGGCTAATAAAATTCGTACG' \
    'GACTGCCCCACACAAGAAGACGGTAAATTTATCAACACTATAGTTGCTATACACCAGGAGCGAGCGTAAATTTGTAGCGGTCAGATTAACTTGCTGGGAACGAACCAT' \
    'TGTCGCCCTCTGCAGCAAGTTAGTTGGCATCATTGGTACTGCCCTTCACTGGTAGCAGCTCCCCCTGTAATATATCCGTGGCCACTATTCAAGGGCTCAAATAGGCGA' \
    'CCCAGAGACCATTATAGGCGGTACAGCGCTGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGAAGGATAATTCTGCGGGACCA' \
    'TGGTCGTCTCCTAACCTCAGGGTGGGATTCCTGGCAGGTGGACCGGGCGCGCATCGAGAGCATTCGGGGTTCCTACCAGCCAGGGAAATCGGGTCGACCACTAGGCAA' \
    'TGAGCGGCTCACACCGATTTTCTTAAGAGACGTAACAAAGCCCGCATTAACGGCTGGAGTGAATCACCGTACGACTACCTAAGCCTCATTGGGATCCACTGTAAACCC' \
    'CTTCGCCGGTGTTGGGTGTCCGCAACGCCTCTGCTTTTTGCGTACAGTCGGCGTGGTGGAGTCCGCGGCCATACTGGCGGTTGGTTTGTAGAACAGTGTAACGACGTG' \
    'TGTCACTGCCCCCCGTAGCTTCTATTGCCCTGTTTGGGAGGTTCTATAGGGGTTACAGAGTAGTTTTAAGTTTTAGCACGACAGCACCAGTATTGCCAGTGACGCCGT' \
    'TGAGGCCGCAAAAGTGATTAACCCCCGTGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTA'

    dna4 = 'CAGAGCAATGTCTGTTAGATAATCTCTCGTCTGGATAGCGAGAAGTTTCCGGAAGACGATTGTTTCCAACGAAAGGGCTGATAACTACACTCTGTCGCGCTTCTTTCG' \
    'TGTTCGCCAAGGGCACATTGGTTTAAAAGTGATCTCGAGAGACGTTTTCCTGACTTGTTGTGTTATATCAACGTAACTTTTAAGTCATATTTTCTCCCTACCCCAGAC' \
    'TAGACGGGTTCCTTTCATCGTCCACCGAGTTGCTTACGAGCAUGACACTTAGCCGGGGAAAAGTTCGCAATTCCGCGACAGCGTCAGGTGTCAAACAGATCCAAGCGA' \
    'AGGCCGCCGTGTAACGGAGAATTGTGGGCGCAGTCAAATAGCTAATTATTGGGAAAGGCCAAGTGGAGTCCGTCAGCGGAACAGCCTGGGCGGACGCGCTGCCGCTCG' \
    'TTCACCTCGCCTGCCTTCGTGTTGGGGACCGGATACGTTCCCAGCGGCAATCCTTGTCTTACCGCCGGACTGCGGAGCGAAGGGAGAAGTAACCGTGGTAATTAGCGA' \
    'GAGACCGTTGAGGCGCGGGGCGATCCGCCCTTGAGTGGACTCCAAACACATTCGACGAAGGGGTGGGAACATAAGTTAATTGGAGGGTCGGGGAAGTCCCACGCCCGG' \
    'TCCCTACATGATTGCACATAGTTCGTTCACCAACGGGCGATCTTCCTCACACTAGAGGAACGAGTAGTACTCCAGACATTGAGTCAGTTGCAGACCAAGTGGAGGGAA' \
    'CGATTTTTAUGGGCCGCTCAGGTACTAGTGCTAGACCCTACAAACGGCACTGGTGACCCGCTCCCGAGTTTGCGCTGTTACGTGTCCCTTAAAGTATACTTCGATCCT' \
    'AACATCGCGGCCATACGACGCTTAAATATTTCACCAGTTGTGTTTCGCGCAUGGAGTTGTTCTGTGTTATCGGCGAGTCTCCATTGCACGTCATCAACTAAAAACCAC' \
    'GGCCACACAGACATGCCTTGATTCTTCCCGCGACGGTAGGTTTGCCTGGGCAGATATCGTTAGCCCCTTCTGCGCGCTATAAGATAGCGATAGTAGGTTTAACTATCA'

    print(dna4.upper())
    # why using the \ line extender and not triple-quoted strings?
  7. claraj created this gist Oct 28, 2020.
    54 changes: 54 additions & 0 deletions dna.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,54 @@
    # Example of real-world use of Python string manipulation - DNA analysis
    #
    # DNA is made of ATGC
    #
    # A-T are always paired
    # G-C are always paired
    #
    # So if you have a sequence of one side of a DNA molecule, can you use Python to generate the other side?
    #


    dna1 = 'accagtaccagtgt'
    dna2 = 'gtacaccaggtcta'

    # Remember
    # a pairs with t
    # c pairs with g

    # so for dna1, it begins acca... so output string will start tggt...

    # In DNA, the ATCG are codes for generating proteins (you are made of 100's of different kinds of proteins).
    # Most of your DNA doesn't appear to make proteins - only about 1% of it encodes protein.
    # A part of DNA that encodes a protein is called a gene So how do you find which parts do,
    # or where are your genes in your DNA?
    #
    # So biologist are interested in where certain codes are. One code is
    # ATG is called a 'start codon' and that means 'start making a protein here'
    #
    # Does string 1 have any genes in it?
    # Does string 2 have any genes in it?
    #
    # If so, what's the index of where that gene is?

    dna3 = 'acgatggatacgcgggagctattcatctgtgttgagaaacaccggagaacttattggtctgtcaagattgcgactgtggtatagctcacccggtcgcggctttctagt' \
    'tagtggccagctcccgtgtatttggaagctgagagaaggacccctgtggttcgaatcagctcacgagcgctggcacaccgcaatcagccggctaataaaattcgtatg' \
    'gactgccccacacaagaagacggtaaatttatcaacactatagttgctatacaccaggagcgagcgtaaatttgtagcggtcagattaacttgctgggaatgaaccat' \
    'tgtcgccctctgcagcaagttagatggcatgattggtactgcccttcactggtagcagctccccctgtaatatatccgtggccactattcaagggctcaaataggcga' \
    'ccatgagagaccattataggcggtacagcgatggtaggtttgcctgggcagatatcgttagccccttctgcgcgctataagatagcgaaggataattctgcgggacca' \
    'tggtcgtctcctaacctcagggtgggattcctggcaggtggaccgggcgcgcatcgagagcattcggggttcctaccagccagggaaatcgggtcgaccactaggcaa' \
    'tgagcggctcacaccgattttcttaagagacgtaacaaagcccgcatgaacggctggagtgaatcaccgtacgactacctaagcctcattgggatccactgtaaaccc' \
    'cttcgccggtgttgggtgtccgcaacgcctctgctttttgcgtacagtcggcgtggtggagtccgcggccatactggcggatggtttgtagaacagtgtaacgatgtg' \
    'tgtcactgccccccgtagcttctattgccatgtttgggaggttctataggggttacagagtagttttaagttttagcacgacagcaccagtattgccagtgatgccgt' \
    'tgaggccgcaaaagtgattaacccccgtgggaccggatacgttcccagcggcaatccttgtcttaccgccggactgcggagcgaagggagaagtaaccgtggtaatta'

    dna4 = 'cagagcaatgtctgttagataatctctcgtctggatagcgagaagtttccggaagacgattgtttccaacgaaagggctgataactacactctgtcgcgcttctttcg' \
    'tgttcgccatgggcacattggtttaaaagtgatctcgagagacgttttcatgacttgttgtgttatatcaacgtaacttttaagtcatattttctccctaccccagac' \
    'tagatgggttcctttcatcgtccaccgagttgcttacgagcaugacacttagccggggaaaatgttcgcaatgttccgcgacagcgtcaggtgtcaaacagaaagcga' \
    'aggccgccgtgtaacggagaattgtgggcgcagtcaaatagctaattattgggaaaggccatgtggagtccgtcagcggaacagcctgggcggacgcgctgccgctcg' \
    'ttcacctcgcctgccttcgtgttggggaccggatacgttcccagcggcaatccttgtcttaccgccggactgcggagcgaagggagaagtaaccgtggtaattagcga' \
    'gagaccgttgaggcgcggggcgatccgcccttgagtggactccaaacacattcgacgaaggggtgggaacataagttaattggagggtcggggaagtcccacgcccgg' \
    'tccctacatgattgcacatagttcgttcaccaacgggcgatcttcctcacactagaggaacgagtagtactccagacattgagtcagttgcagaccaagtggagggaa' \
    'cgatttttaugggccgctcaggtactagtgctagaatgcctacaaacggcactggtgacccgctcccgagtttgcgctgttacgtgtcccttaaagtatacttcgatc' \
    'aacatggcggccatacgacgcttaaatatttcaccagttgtgtttcgcgcauggagttgttctgtgttatcggcgagtctccattgcacgtcatcaactaaaaaccac' \
    'ggccacacagacatgccttgattcttcccgcgatggtaggtttgcctgggcagatatcgttagccccttctgcgcgctataagatagcgatggtaggtttaactatca'