#!/usr/bin/env python3 # vim: shiftwidth=4 softtabstop=4 tabstop=4 # File: vooblystats.py # Author: bgstack15@gmail.com # Startdate: 2020-02-01 18:55 # Title: Script to Pull Game Data from Voobly # Purpose: Pull stats from Voobly for data visualiation for 30_Turbo_Swag # History: # Usage: # Reference: # ripped primarily from https://github.com/nathankong97/voobly-parse-aoe2-game-data # date conversion: # https://stackoverflow.com/questions/4615250/convert-relative-date-string-to-absolute-date/4615451#4615451 # https://github.com/bear/parsedatetime # https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime/4770688#4770688 # Improve: # add premiumbool? add gamemvp? # add player country? from __future__ import print_function import argparse, datetime, os, sys, subprocess, re, urllib, requests, pandas as pd import parsedatetime as pdt from distutils.spawn import find_executable from pytz import timezone as tz sys.path.append("/usr/share/bgscripts/py") from bgs import debuglev, eprint from bs4 import BeautifulSoup vooblystatspyversion="2020-02-02c" # Define functions # Default default variables today = datetime.date.today().isoformat() # THEORY: when 2 people play the same color, the second player is assigned civ_number+(len(civ_numbers)) civ_dict = {'1':'Britons','2':'Franks','3':'Goths','4':'Teutons','5':'Japanese','6':'Chinese','7':'Byzantines', '8':'Persians', '9':'Saracens','10':'Turks','11':'Vikings','12':'Mongols','13':'Celts','14':'Spanish', '15':'Aztecs','16':'Mayans','17':'Huns','18':'Koreans','19':'Italians', '20':'Indians','21':'Incas','22':'Magyars','23':'Slavs','24':'Portuguese','25':'Ethiopians', '26':'Malians','27':'Berbers','28':'Khmer','29':'Malay','30':'Burmese','31':'Vietnamese', '32':'Britons2', '33':'Franks2', '34':'Goths2', '35':'Teutons2', '36':'Japanese2', '37':'Chinese2', '38':'Byzantines2', '39':'Persians2', '40':'Saracens2', '41':'Turks2', '42':'Vikings2', '43':'Mongols2', '44':'Celts2', '45':'Spanish2', '46':'Aztecs2', '47':'Mayans2', '48':'Huns2', '49':'Koreans2', '50':'Italians', '51':'Indians2', '52':'Incas2', '53':'Magyars2', '54':'Slavs2', '55':'Portuguese2', '56':'Ethiopians2', '57':'Malians2', '58':'Berbers2', '59':'Khmer2', '60':'Malay2', '61':'Burmese2', '62':'Vietnamese2', } color_dict = { "0054A6": "blue", "FF0000": "red", "FFFF00": "yellow", "00A651": "green", "00FFFF": "cyan", "92278F": "purple", "C0C0C0": "gray", "FF8000": "orange", } # Parse parameters parser = argparse.ArgumentParser(description="Pull game stats from voobly") #aoriparam = parser.add_mutually_exclusive_group() #aoriparam.add_argument("-i", "--installed", action='store_true', help='Default value.') #aoriparam.add_argument("-a", "--available", action='store_true') #parser.add_argument("-r", "--refresh", action='store_true', help='Force a refresh of an existing file for today.') #parser.add_argument("searchstring", nargs='*') parser.add_argument("-d","--debug", nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level.") parser.add_argument("--start", required=True,type=int, help="Set starting game number.") parser.add_argument("--end", required=True,type=int, help="Set ending game number.") parser.add_argument("--gameid", type=int, help="Fetch data for a specific matchid") parser.add_argument("--save", action='store_true', help="Show page text instead of parsing") parser.add_argument("-V","--version", action="version", version="%(prog)s " + vooblystatspyversion) args = parser.parse_args() debuglevel=0 if args.debug is None: # -d was used but no value provided debuglevel = 10 elif args.debug: debuglevel = args.debug #if debuglev(10,debuglevel): print(searchstring) # Determine filename #thisfile = fileprefix + "." + aori + "." + today + ".log" #if debuglev(5,debuglevel): eprint("Using file " + thisfile) # Ensure the ~/.dli directory exists #if not os.path.exists(outdir): # os.makedirs(outdir) def login_session(username, password): with requests.Session() as s: s.get('https://www.voobly.com/login') form = {'username': username, 'password': password} s.post('https://www.voobly.com/login/auth', data=form) return s def get_game_page(session, gameid): a=session.get("https://www.voobly.com/match/view/" + str(gameid) + "/Match-Details") return a.text def match(soup): # check the player numbers team_list = [] for i in soup.find_all(name='span', attrs={'style': 'font-size:11px; color:#82909D'}): team_list.append(i.text[0]) # the previous guy only cared about equal number of winner count and loser count #if team_list.count('N') != team_list.count('T'): # return False table = soup.find_all(name='td',attrs={'width':'50%','valign': 'top'})[0].find_all('table')[0] table_data = [[cell.text for cell in row("td")] for row in table("tr")] table_data = [x for x in table_data if x != ['']] match_dict = dict(table_data) match_dict['Win'] = [] match_dict['Loss'] = [] for i in soup.find_all('a'): if "ladder/" in i.get('href'): match_dict['Ladder'] = i.text gameid = match_dict["Match Details"].lstrip("#") #eprint(gameid) return match_dict def player(match, soup): player = [['ID','Name','Clan','New rating','Winbool','Change','Civilization','Team','Overall','Military','Economy','Technology','Society']] player_num = int(match['Players:']) table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0] key = 'https://voobly.com/profile/' for i in table.find_all('a'): #this is printing out all id if key in i.get('href'): num = int(''.join(re.findall('[0-9]',i.get('href')))) #print(int(''.join(re.findall('[0-9]',i.get('href'))))) player.append([num]) # new way to extract name, from the alternate text from the images counts = 1 player_counts = 0 x=0 found_names = [] for k in soup.find_all('td'): #for i in soup.find_all(name='img'): for i in k.find_all('img'): #print("FOO",i) x += 1 # always need to skip the first two images with alt text if x > 2: for j in re.findall("alt=\"[^\"]+\"",str(i)): name = j.split('"')[1] #print("investigating",counts," using name ",name) if "Age of Empires II: The Conquerors" != name and counts < min(9, player_num + 1) and name not in found_names: #print("found player number",counts," using name ",name) player_counts += 1 player[counts].append(name) found_names.append(name) counts+=1 # get clan tags # this is not always sufficient to get names, because of gameid 20365401 Tic@voobly counts = 1 for i in table.find_all('a'): #this is printing out all name if key in i.get('href'): #player_counts += 1 #name = i.contents[0] #player[counts].append(name) try: if re.match("^\[.*]$",i.previous_element): player[counts].append(i.previous_element) # clan else: player[counts].append("") # no clan name except: player[counts].append("") # no clan name counts+=1 #print(i.contents[0]) # source error can happen where the page lists 50 players! if player_num > (counts-1): print("[WARNING] readjusting player_num from",player_num,"to ",(counts-1)) player_num = (counts-1) # fetch per-player rating info counts = 1 for i in soup.find_all('span'): x=0 for item in i.find_all("b"): x += 1 # for a regular game, it is this: #if counts/2 != int(counts/2): if 3 == x: if 0 < int(i.find_all("b")[1].text): # for left column, 0=new rating, 1=points, 2=team player[counts].append(i.find_all("b")[0].text) # team number is basically a boolean for "didwin" player[counts].append(i.find_all("b")[2].text == "1") else: # for right column, 0=team, 1=points, 2=new rating player[counts].append(i.find_all("b")[2].text) player[counts].append(i.find_all("b")[0].text == "1") # always include points player[counts].append(i.find_all("b")[1].text) counts += 1 if counts >= 9: break counts = 1 key = '/res/games/AOC/civs/' for i in soup.find_all('img'): #this is printing out all civ if key in i.get('src'): civ = str(''.join(x for x in i.get('src') if x.isdigit())) player[counts].append(civ_dict[civ]) counts += 1 counts = 1 #print(player_num) # DEBUG1 for i in range(player_num): #print("counts:",counts,"player:",player[i]) #DEBUG1 if i >= (player_num/2): #print(2) player[counts].append(2) counts += 1 else: #print(1) player[counts].append(1) counts += 1 counts = 1 for i in range(player_num): player[counts].append([]) player[counts].append([]) player[counts].append([]) player[counts].append([]) player[counts].append([]) counts += 1 # player #for item in player[8]: # print(item) df = pd.DataFrame(player[1:],columns=player[0]) player_dict = df.to_dict("index") player_dict = list(player_dict.values()) return player_dict def score(soup): score = [['Color','Military Score','Economy Score','Technology Score','Society Score','Total']] lst = [] count = 0 playercount = 1 table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0] x=0 hascolor=0 for i in table.find_all('center')[5:]: x += 1 # player color. This is very inefficient but it works, so I'm stopping. Because the main loop in this function is dependent on table.find_all('center'), but I have to loop over a different set of things, this needs some crazy setup. counts=0 for j in table.find_all(name='div',string=''): if "" == j.text: counts += 1 if counts == int((x-1)/5)+1: color="" #print(j.attrs["style"].split()) try: color = str(color_dict[j.attrs["style"].split()[1].lstrip("#").rstrip(";")]) if "padding:" == color: color = "nocolor" except: color = "nocolor" if hascolor != 1: lst.append(color) hascolor = 1 break if hascolor == 1: break if hascolor == 1: break hascolor = 1 if i.find('div'): #print(i.find('div').contents[0].replace(',','')) num = i.find('div').contents[0].replace(',','') lst.append(num) count += 1 else: #print(i.contents[0].replace(',','')) num = i.contents[0].replace(',','') lst.append(num) count += 1 if count == 5: score.append(lst) lst = [] count = 0 hascolor=0 playercount += 1 df = pd.DataFrame(score[1:],columns = score[0]) score_dict = df.to_dict("index") score_dict = list(score_dict.values()) return score_dict def military(soup): military = [['Unit Killed','Unit Lost','Building Razed','Building Lost','Units Converted']] table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[1] lst = [] count = 0 for i in table.find_all('center')[5:]: if i.find('div'): num = i.find('div').contents[0].replace(',','') lst.append(num) count += 1 #print(i.find('div').contents[0].replace(',','')) else: num = i.contents[0].replace(',','') lst.append(num) count += 1 #print(i.contents[0].replace(',','')) if count == 5: military.append(lst) lst = [] count = 0 df = pd.DataFrame(military[1:],columns = military[0]) mil_dict = df.to_dict("index") mil_dict = list(mil_dict.values()) return mil_dict def economy(soup): economy = [['Food','Wood','Stone','Gold','Trade','Received','Sent']] table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[2] lst = [] count = 0 for i in table.find_all('center')[7:]: if i.find('div'): num = i.find('div').contents[0].replace(',','') lst.append(num) count += 1 #print(i.find('div').contents[0].replace(',','')) else: num = i.contents[0].replace(',','') lst.append(num) count += 1 #print(i.contents[0].replace(',','')) if count == 7: economy.append(lst) lst = [] count = 0 df = pd.DataFrame(economy[1:],columns = economy[0]) eco_dict = df.to_dict("index") eco_dict = list(eco_dict.values()) return eco_dict def tech(soup): technology = [['Feudal Time','Castle Time','Imperial Time','Map Explored','Research Count','Research Percentage']] table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[3] lst = [] count = 0 for i in table.find_all('center')[6:]: if i.find('div'): num = i.find('div').contents[0].replace(',','') lst.append(num) count += 1 #print(i.find('div').contents[0].replace(',','')) else: num = i.contents[0].replace(',','') lst.append(num) count += 1 #print(i.contents[0].replace(',','')) if count == 6: technology.append(lst) lst = [] count = 0 df = pd.DataFrame(technology[1:],columns = technology[0]) tech_dict = df.to_dict("index") tech_dict = list(tech_dict.values()) return tech_dict def society(soup): society = [['Total Wonders','Total Castles','Relic Capture','Relic Gold','Villager High']] table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[4] lst = [] count = 0 for i in table.find_all('center')[5:]: if i.find('div'): num = i.find('div').contents[0].replace(',','') lst.append(num) count += 1 #print(i.find('div').contents[0].replace(',','')) else: num = i.contents[0].replace(',','') lst.append(num) count += 1 #print(i.contents[0].replace(',','')) if count == 5: society.append(lst) lst = [] count = 0 df = pd.DataFrame(society[1:],columns = society[0]) soc_dict = df.to_dict("index") soc_dict = list(soc_dict.values()) return soc_dict def combine_orig(match,player,score,military,economy,tech,society): for i in range(len(player)): player[i]['Overall'] = score[i] player[i]['Military'] = military[i] player[i]['Economy'] = economy[i] player[i]['Technology'] = tech[i] player[i]['Society'] = society[i] win = [i for i in player if i['Team'] == 1] loss = [i for i in player if i['Team'] == 2] match['Win'] = win match['Loss'] = loss return match def combine(match,player,score,military,economy,tech,society): # this is the new combine, really parse_game_page gameid = match["Match Details"].lstrip("#") dateplayed = match["Date Played:"] cal = pdt.Calendar() dto, _ = cal.parseDT(datetimeString=dateplayed, tzinfo=tz("US/Eastern")) add_hours = int((str(dto)[-6:])[:3]) dateplayed = (datetime.timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%M:%SZ') # do not use match rating mapname = match["Map:"] duration = match["Duration:"] playercount = match["Players:"] mod = match["Game Mod:"] ladder = match["Ladder"] # print csv output for game game_line = ( "GAME," + gameid + "," + dateplayed + "," + mapname + "," + duration + "," + playercount + "," + mod + "," + ladder + "," ) if False: print(game_line) # build player csv lines player_line = [] for i in range(len(player)): tp = player[i] ts = score[i] tm = military[i] te = economy[i] tt = tech[i] tc = society[i] #print(tp) #print(ts) player_line.append( "PLAYER," + str(gameid) + "," + str(tp["ID"]) + "," + str(tp["Name"]) + "," + str(ts["Color"]) + "," + str(tp["Clan"]) + "," + str(tp["New rating"]) + "," + str(tp["Change"]) + "," + str(tp["Winbool"]) + "," + str(tp["Civilization"]) + "," + str(ts["Military Score"]) + "," + str(ts["Economy Score"]) + "," + str(ts["Technology Score"]) + "," + str(ts["Society Score"]) + "," + str(ts["Total"]) + "," + str(tm["Unit Killed"]) + "," + str(tm["Unit Lost"]) + "," + str(tm["Building Razed"]) + "," + str(tm["Building Lost"]) + "," + str(tm["Units Converted"]) + "," + str(te["Food"]) + "," + str(te["Wood"]) + "," + str(te["Stone"]) + "," + str(te["Gold"]) + "," + str(te["Trade"]) + "," + str(te["Received"]) + "," + str(te["Sent"]) + "," + str(tt["Feudal Time"]) + "," + str(tt["Castle Time"]) + "," + str(tt["Imperial Time"]) + "," + str(tt["Map Explored"]) + "," + str(tt["Research Count"]) + "," + str(tt["Research Percentage"]) + "," + str(tc["Total Wonders"]) + "," + str(tc["Total Castles"]) + "," + str(tc["Relic Capture"]) + "," + str(tc["Relic Gold"]) + "," + str(tc["Villager High"]) + "," ) if False: print(player_line[i]) response = game_line for i in player_line: response += "\n" + i return response def parse_game_page(page_text): soup = BeautifulSoup(page_text,"html.parser") try: if "Page Not Found" == soup.find(name="div",class_="page-title").text or "Age of Empires II: The Conquerors" != soup.find("h3").text: return "invalid page" except: foo = None matchid = soup.find_all("a")[19].get('href').split('/')[3] # skip this match if it has a computer player for i in soup.find_all("td"): if re.match(".*\(Computer\).*",i.text): return "[ERROR] has computer player: " + matchid # capture only aoc if "Age of Empires II: The Conquerors" != soup.find("h3").text: return "[ERROR] not aoc: " + matchid #parse_game_page(page) game = match(soup) play = player(game, soup) sc = score(soup) mil = military(soup) eco = economy(soup) tec = tech(soup) soc = society(soup) return combine(game,play,sc,mil,eco,tec,soc) # MAIN session = login_session("brainpinky","pinkyBrain") # 19914658 if args.gameid: a = get_game_page(session,args.gameid).encode('latin-1','replace') if args.save: print(a.encode('utf-8')) else: print(parse_game_page(a)) sys.exit(0) # span should be 19914650 to 21260965 # main loop for i in range(args.start,args.end): a = get_game_page(session,i).encode('latin-1','replace') print(parse_game_page(a))