Skip to content

Instantly share code, notes, and snippets.

@karthik101
Forked from shadowfax92/flipkart_parser.py
Last active August 29, 2015 14:07

Revisions

  1. @shadowfax92 shadowfax92 renamed this gist Oct 6, 2014. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion gistfile1.py → flipkart_parser.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,6 @@
    #Author = Nikhil Venkat Sonti
    # Author = Nikhil Venkat Sonti
    # email = nikhilsv92@gmail.com
    # github ID = shadowfax92
    import sys
    from xml.dom.minidom import _get_StringIO
    from lxml import html
  2. @shadowfax92 shadowfax92 revised this gist Oct 6, 2014. No changes.
  3. @shadowfax92 shadowfax92 created this gist Oct 6, 2014.
    143 changes: 143 additions & 0 deletions gistfile1.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,143 @@
    #Author = Nikhil Venkat Sonti
    # email = nikhilsv92@gmail.com
    import sys
    from xml.dom.minidom import _get_StringIO
    from lxml import html
    import requests
    import os
    import re
    import time
    import datetime
    import csv
    import urllib2
    from StringIO import StringIO
    import multiprocessing as mp
    import socket
    import shutil
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    from email.mime.application import MIMEApplication
    import subprocess
    import time

    new_item_dict = {}

    def play_alert():
    # play alert on Mac using say command
    os.system('say "ring ring ring"')

    def mail_me(subject, content):
    msg = MIMEMultipart('alternative')
    msgbody = MIMEText(content)
    msg["From"] = "X@gmail.com"
    msg["To"] = "Y@gmail.com"
    msg["Subject"] = subject
    msg.attach(msgbody)
    p = subprocess.Popen(["/usr/sbin/sendmail", "-t"], stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    (out,err) = p.communicate(msg.as_string())
    print 'Mailed = ' + content

    def get_content_from_url_and_store():
    try:
    url = 'http://www.flipkart.com/'
    page = requests.get("http://www.flipkart.com/")
    tree = html.parse(StringIO(page.text)).getroot()

    os.system('clear')
    print '\nRUNNING FLIPKART PARSER'

    for i in range(1,8):
    try:
    mail_content = ""
    mail_subject = ""
    name_1 = ""
    name_2 = ""
    print
    xpath_1 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[4]/text()'
    xpath_2 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[2]/div/text()'
    xpath_3 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[1]/text()'
    xpath_4 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/div/div[2]/text()'
    xpath_5_link = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[5]/div[2]/div'

    #sold out check
    try:
    name_4 = tree.xpath(xpath_4)[0].strip(' \t\n\r')
    print "sold-out or not = " + name_4
    except Exception, e:
    pass
    finally:
    pass

    try:
    name_1 = tree.xpath(xpath_1)[0].strip(' \t\n\r')
    print "name-1 = " + name_1
    if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_1, re.IGNORECASE):
    play_alert()
    # mail_subject += name_1 + " "
    # mail_subject += str(name_1) + " "
    except Exception, e:
    pass
    finally:
    pass

    try:
    name_2 = tree.xpath(xpath_2)[0].strip(' \t\n\r')
    print "name-2 = " + name_2
    if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_2, re.IGNORECASE):
    play_alert()
    # mail_subject += str(name_2) + " "
    except Exception, e:
    pass
    finally:
    pass

    try:
    name_3 = tree.xpath(xpath_3)[0].strip(' \t\n\r')
    print "offer percentage/price = " + name_3
    except Exception, e:
    pass
    finally:
    pass

    try:
    name_5 = tree.xpath(xpath_5_link)[0].get('data-url')
    link = 'http://www.flipkart.com'+name_5
    print "view/shop link = " + link
    # print "view/shop link = " + name_5[0].strip(' \t\n\r')
    except Exception, e:
    pass
    finally:
    pass

    mail_subject = str(name_1) + " " + str(name_2)
    if mail_subject not in new_item_dict:
    mail_content += str(name_1) + "\n"
    mail_content += str(name_2) + "\n"
    mail_content += str(name_4) + "\n"
    mail_content += str(name_3) + "\n"
    mail_content += str(link) + "\n"
    new_item_dict[mail_subject] = mail_content
    mail_me(mail_subject, mail_content)
    play_alert()

    except Exception, e:
    print str(e)
    finally:
    pass
    except Exception, e:
    print 'Something Went Wrong :('
    print 'Exception: ', str(e)
    pass
    finally:
    pass



    def main():
    while True:
    get_content_from_url_and_store()
    time.sleep(5)


    if __name__ == '__main__':
    main()