Last active
March 23, 2021 14:44
-
-
Save budidino/4a2f88b6e34d7f161885f0c760642650 to your computer and use it in GitHub Desktop.
get App Store reviews
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmltodict | |
import requests | |
import datetime | |
import multiprocessing as mp | |
COUNTRY_CODES = { | |
"AF": "AFGHANISTAN", | |
# "AX": "ÅLAND ISLANDS", | |
"AL": "ALBANIA", | |
"DZ": "ALGERIA", | |
# "AS": "AMERICAN SAMOA", | |
"AD": "ANDORRA", | |
"AO": "ANGOLA", | |
"AI": "ANGUILLA", | |
# "AQ": "ANTARCTICA", | |
"AG": "ANTIGUA AND BARBUDA", | |
"AR": "ARGENTINA", | |
"AM": "ARMENIA", | |
# "AW": "ARUBA", | |
"AU": "AUSTRALIA", | |
"AT": "AUSTRIA", | |
"AZ": "AZERBAIJAN", | |
"BS": "BAHAMAS", | |
"BH": "BAHRAIN", | |
"BD": "BANGLADESH", | |
"BB": "BARBADOS", | |
"BY": "BELARUS", | |
"BE": "BELGIUM", | |
"BZ": "BELIZE", | |
"BJ": "BENIN", | |
"BM": "BERMUDA", | |
"BT": "BHUTAN", | |
"BO": "BOLIVIA, PLURINATIONAL STATE OF", | |
# "BQ": "BONAIRE, SINT EUSTATIUS AND SABA", | |
"BA": "BOSNIA AND HERZEGOVINA", | |
"BW": "BOTSWANA", | |
# "BV": "BOUVET ISLAND", | |
"BR": "BRAZIL", | |
# "IO": "BRITISH INDIAN OCEAN TERRITORY", | |
"BN": "BRUNEI DARUSSALAM", | |
"BG": "BULGARIA", | |
"BF": "BURKINA FASO", | |
# "BI": "BURUNDI", | |
"KH": "CAMBODIA", | |
"CM": "CAMEROON", | |
"CA": "CANADA", | |
"CV": "CAPE VERDE", | |
"KY": "CAYMAN ISLANDS", | |
"CF": "CENTRAL AFRICAN REPUBLIC", | |
"TD": "CHAD", | |
"CL": "CHILE", | |
"CN": "CHINA", | |
# "CX": "CHRISTMAS ISLAND", | |
# "CC": "COCOS (KEELING) ISLANDS", | |
"CO": "COLOMBIA", | |
# "KM": "COMOROS", | |
"CG": "CONGO", | |
"CD": "CONGO, THE DEMOCRATIC REPUBLIC OF THE", | |
# "CK": "COOK ISLANDS", | |
"CR": "COSTA RICA", | |
"CI": "CÔTE D'IVOIRE", | |
"HR": "CROATIA", | |
# "CU": "CUBA", | |
# "CW": "CURAÇAO", | |
"CY": "CYPRUS", | |
"CZ": "CZECH REPUBLIC", | |
"DK": "DENMARK", | |
# "DJ": "DJIBOUTI", | |
"DM": "DOMINICA", | |
"DO": "DOMINICAN REPUBLIC", | |
"EC": "ECUADOR", | |
"EG": "EGYPT", | |
"SV": "EL SALVADOR", | |
# "GQ": "EQUATORIAL GUINEA", | |
# "ER": "ERITREA", | |
"EE": "ESTONIA", | |
"ET": "ETHIOPIA", | |
# "FK": "FALKLAND ISLANDS (MALVINAS)", | |
# "FO": "FAROE ISLANDS", | |
"FJ": "FIJI", | |
"FI": "FINLAND", | |
"FR": "FRANCE", | |
# "GF": "FRENCH GUIANA", | |
# "PF": "FRENCH POLYNESIA", | |
# "TF": "FRENCH SOUTHERN TERRITORIES", | |
"GA": "GABON", | |
"GM": "GAMBIA", | |
"GE": "GEORGIA", | |
"DE": "GERMANY", | |
"GH": "GHANA", | |
# "GI": "GIBRALTAR", | |
"GR": "GREECE", | |
# "GL": "GREENLAND", | |
"GD": "GRENADA", | |
# "GP": "GUADELOUPE", | |
# "GU": "GUAM", | |
"GT": "GUATEMALA", | |
# "GG": "GUERNSEY", | |
"GN": "GUINEA", | |
"GW": "GUINEA-BISSAU", | |
"GY": "GUYANA", | |
# "HT": "HAITI", | |
# "HM": "HEARD ISLAND AND MCDONALD ISLANDS", | |
# "VA": "HOLY SEE (VATICAN CITY STATE)", | |
"HN": "HONDURAS", | |
"HK": "HONG KONG", | |
"HU": "HUNGARY", | |
"IS": "ICELAND", | |
"IN": "INDIA", | |
"ID": "INDONESIA", | |
# "IR": "IRAN, ISLAMIC REPUBLIC OF", | |
"IQ": "IRAQ", | |
"IE": "IRELAND", | |
# "IM": "ISLE OF MAN", | |
"IL": "ISRAEL", | |
"IT": "ITALY", | |
"JM": "JAMAICA", | |
"JP": "JAPAN", | |
# "JE": "JERSEY", | |
"JO": "JORDAN", | |
"KZ": "KAZAKHSTAN", | |
"KE": "KENYA", | |
# "KI": "KIRIBATI", | |
# "KP": "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF", | |
"KR": "KOREA, REPUBLIC OF", | |
"KW": "KUWAIT", | |
"KG": "KYRGYZSTAN", | |
"LA": "LAO PEOPLE'S DEMOCRATIC REPUBLIC", | |
"LV": "LATVIA", | |
"LB": "LEBANON", | |
# "LS": "LESOTHO", | |
"LR": "LIBERIA", | |
"LY": "LIBYA", | |
"LI": "LIECHTENSTEIN", | |
"LT": "LITHUANIA", | |
"LU": "LUXEMBOURG", | |
"MO": "MACAO", | |
"MK": "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF", | |
"MG": "MADAGASCAR", | |
"MW": "MALAWI", | |
"MY": "MALAYSIA", | |
"MV": "MALDIVES", | |
"ML": "MALI", | |
"MT": "MALTA", | |
# "MH": "MARSHALL ISLANDS", | |
# "MQ": "MARTINIQUE", | |
"MR": "MAURITANIA", | |
"MU": "MAURITIUS", | |
# "YT": "MAYOTTE", | |
"MX": "MEXICO", | |
"FM": "MICRONESIA, FEDERATED STATES OF", | |
"MD": "MOLDOVA, REPUBLIC OF", | |
"MC": "MONACO", | |
"MN": "MONGOLIA", | |
"ME": "MONTENEGRO", | |
"MS": "MONTSERRAT", | |
"MA": "MOROCCO", | |
"MZ": "MOZAMBIQUE", | |
"MM": "MYANMAR", | |
"NA": "NAMIBIA", | |
"NR": "NAURU", | |
"NP": "NEPAL", | |
"NL": "NETHERLANDS", | |
# "NC": "NEW CALEDONIA", | |
"NZ": "NEW ZEALAND", | |
"NI": "NICARAGUA", | |
"NE": "NIGER", | |
"NG": "NIGERIA", | |
# "NU": "NIUE", | |
# "NF": "NORFOLK ISLAND", | |
# "MP": "NORTHERN MARIANA ISLANDS", | |
"NO": "NORWAY", | |
"OM": "OMAN", | |
"PK": "PAKISTAN", | |
"PW": "PALAU", | |
"PS": "PALESTINE, STATE OF", | |
"PA": "PANAMA", | |
"PG": "PAPUA NEW GUINEA", | |
"PY": "PARAGUAY", | |
"PE": "PERU", | |
"PH": "PHILIPPINES", | |
# "PN": "PITCAIRN", | |
"PL": "POLAND", | |
"PT": "PORTUGAL", | |
# "PR": "PUERTO RICO", | |
"QA": "QATAR", | |
# "RE": "RÉUNION", | |
"RO": "ROMANIA", | |
"RU": "RUSSIAN FEDERATION", | |
"RW": "RWANDA", | |
# "BL": "SAINT BARTHÉLEMY", | |
# "SH": "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA", | |
"KN": "SAINT KITTS AND NEVIS", | |
"LC": "SAINT LUCIA", | |
# "MF": "SAINT MARTIN (FRENCH PART)", | |
# "PM": "SAINT PIERRE AND MIQUELON", | |
"VC": "SAINT VINCENT AND THE GRENADINES", | |
"WS": "SAMOA", | |
# "SM": "SAN MARINO", | |
"ST": "SAO TOME AND PRINCIPE", | |
"SA": "SAUDI ARABIA", | |
"SN": "SENEGAL", | |
"RS": "SERBIA", | |
"SC": "SEYCHELLES", | |
"SL": "SIERRA LEONE", | |
"SG": "SINGAPORE", | |
# "SX": "SINT MAARTEN (DUTCH PART)", | |
"SK": "SLOVAKIA", | |
"SI": "SLOVENIA", | |
"SB": "SOLOMON ISLANDS", | |
# "SO": "SOMALIA", | |
"ZA": "SOUTH AFRICA", | |
# "GS": "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS", | |
# "SS": "SOUTH SUDAN", | |
"ES": "SPAIN", | |
"LK": "SRI LANKA", | |
# "SD": "SUDAN", | |
"SR": "SURINAME", | |
# "SJ": "SVALBARD AND JAN MAYEN", | |
"SZ": "SWAZILAND", | |
"SE": "SWEDEN", | |
"CH": "SWITZERLAND", | |
# "SY": "SYRIAN ARAB REPUBLIC", | |
"TW": "TAIWAN, PROVINCE OF CHINA", | |
"TJ": "TAJIKISTAN", | |
"TZ": "TANZANIA, UNITED REPUBLIC OF", | |
"TH": "THAILAND", | |
# "TL": "TIMOR-LESTE", | |
# "TG": "TOGO", | |
# "TK": "TOKELAU", | |
"TO": "TONGA", | |
"TT": "TRINIDAD AND TOBAGO", | |
"TN": "TUNISIA", | |
"TR": "TURKEY", | |
"TM": "TURKMENISTAN", | |
"TC": "TURKS AND CAICOS ISLANDS", | |
# "TV": "TUVALU", | |
"UG": "UGANDA", | |
"UA": "UKRAINE", | |
"AE": "UNITED ARAB EMIRATES", | |
"GB": "UNITED KINGDOM", | |
"US": "UNITED STATES", | |
# "UM": "UNITED STATES MINOR OUTLYING ISLANDS", | |
"UY": "URUGUAY", | |
"UZ": "UZBEKISTAN", | |
"VU": "VANUATU", | |
"VE": "VENEZUELA, BOLIVARIAN REPUBLIC OF", | |
"VN": "VIET NAM", | |
"VG": "VIRGIN ISLANDS, BRITISH", | |
# "VI": "VIRGIN ISLANDS, U.S.", | |
# "WF": "WALLIS AND FUTUNA", | |
# "EH": "WESTERN SAHARA", | |
"YE": "YEMEN", | |
"ZM": "ZAMBIA", | |
"ZW": "ZIMBABWE", | |
} | |
class Review: | |
def __init__(self, updated, country, title, review, rating, version, author): | |
self.updated = updated | |
self.country = country | |
self.title = title | |
self.review = review | |
self.rating = rating | |
self.version = version | |
self.author = author | |
def review_from_entry(entry, country) -> Review: | |
updated = datetime.datetime.fromisoformat(entry["updated"]) | |
title = entry["title"] | |
review = entry["content"][0]["#text"] | |
rating = entry["im:rating"] | |
version = entry["im:version"] | |
author = entry["author"]["name"] | |
return Review(updated, country, title, review, rating, version, author) | |
def reviews_for_country_code(app_id, country_code) -> [Review]: | |
reviews = [] | |
url = f"https://itunes.apple.com/{country_code}/rss/customerreviews/id={app_id}/xml" | |
headers = {'User-Agent': 'Mozilla/5.0', 'Cache-Control': 'no-cache'} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 400: | |
print(f"NO APP STORE: {country_code}") | |
return reviews | |
data = xmltodict.parse(response.content) | |
if "entry" in data['feed'].keys(): # has review entries | |
entries = data['feed']['entry'] | |
if isinstance(entries, list): # list of reviews | |
for entry in data["feed"]["entry"]: | |
reviews.append(review_from_entry(entry, country_code)) | |
elif "title" in entries.keys(): # only one review | |
reviews.append(review_from_entry(entries, country_code)) | |
# print(f"{country_code}: {len(reviews)}") | |
return reviews | |
def get_reviews(app_id, country_codes): | |
pool = mp.Pool() | |
for country_code in country_codes: | |
pool.apply_async(reviews_for_country_code, args = (app_id, country_code, ), callback = log_reviews) | |
pool.close() | |
pool.join() | |
review_list = [] | |
def log_reviews(reviews): | |
global review_list | |
review_list += reviews | |
app_id = "428395953" | |
if __name__ == '__main__': | |
get_reviews(app_id, COUNTRY_CODES) | |
sorted_reviews = sorted(review_list, key=lambda review: review.updated, reverse=True) | |
review_sum = 0 | |
for review in sorted_reviews: | |
review_sum += int(review.rating) | |
print(f"\ndate: {review.updated}\ncountry: {COUNTRY_CODES[review.country]}\nversion: {review.version}\nrating: {review.rating}\nauthor: {review.author}\ntitle: {review.title}\nreview:\n{review.review}\n") | |
print(f"REVIEWS: {len(review_list)}") | |
print(f"RATING: {round(review_sum / len(review_list), 2)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
added multithreading with the second revision