From b056c0f6d7099251ef1015783b55354636117fc3 Mon Sep 17 00:00:00 2001 From: B Stack Date: Mon, 3 Feb 2020 11:13:44 -0500 Subject: add python version, and some example wrapper scripts --- .gitignore | 2 + vooblystats.py | 555 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ wrap2.sh | 17 ++ wrapper.sh | 22 +++ 4 files changed, 596 insertions(+) create mode 100755 vooblystats.py create mode 100755 wrap2.sh create mode 100755 wrapper.sh diff --git a/.gitignore b/.gitignore index 4ddf8bb..82bdcec 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ cookies old *.conf *.swp +*.html +files/ diff --git a/vooblystats.py b/vooblystats.py new file mode 100755 index 0000000..b97b3c8 --- /dev/null +++ b/vooblystats.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python3 +# vim: shiftwidth=4 softtabstop=4 tabstop=4 +# File: vooblystats.py +# Author: bgstack15@gmail.com +# Startdate: 2020-02-01 18:55 +# Title: Script to Pull Game Data from Voobly +# Purpose: Pull stats from Voobly for data visualiation for 30_Turbo_Swag +# History: +# Usage: +# Reference: +# ripped primarily from https://github.com/nathankong97/voobly-parse-aoe2-game-data +# date conversion: +# https://stackoverflow.com/questions/4615250/convert-relative-date-string-to-absolute-date/4615451#4615451 +# https://github.com/bear/parsedatetime +# https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime/4770688#4770688 +# Improve: +# add premiumbool? add gamemvp? +# add player country? +from __future__ import print_function +import argparse, datetime, os, sys, subprocess, re, urllib, requests, pandas as pd +import parsedatetime as pdt +from distutils.spawn import find_executable +from pytz import timezone as tz +sys.path.append("/usr/share/bgscripts/py") +from bgs import debuglev, eprint +from bs4 import BeautifulSoup + +vooblystatspyversion="2020-02-02c" + +# Define functions + +# Default default variables +today = datetime.date.today().isoformat() + +# THEORY: when 2 people play the same color, the second player is assigned civ_number+(len(civ_numbers)) +civ_dict = {'1':'Britons','2':'Franks','3':'Goths','4':'Teutons','5':'Japanese','6':'Chinese','7':'Byzantines', + '8':'Persians', '9':'Saracens','10':'Turks','11':'Vikings','12':'Mongols','13':'Celts','14':'Spanish', + '15':'Aztecs','16':'Mayans','17':'Huns','18':'Koreans','19':'Italians', + '20':'Indians','21':'Incas','22':'Magyars','23':'Slavs','24':'Portuguese','25':'Ethiopians', + '26':'Malians','27':'Berbers','28':'Khmer','29':'Malay','30':'Burmese','31':'Vietnamese', + '32':'Britons2', + '33':'Franks2', + '34':'Goths2', + '35':'Teutons2', + '36':'Japanese2', + '37':'Chinese2', + '38':'Byzantines2', + '39':'Persians2', + '40':'Saracens2', + '41':'Turks2', + '42':'Vikings2', + '43':'Mongols2', + '44':'Celts2', + '45':'Spanish2', + '46':'Aztecs2', + '47':'Mayans2', + '48':'Huns2', + '49':'Koreans2', + '50':'Italians', + '51':'Indians2', + '52':'Incas2', + '53':'Magyars2', + '54':'Slavs2', + '55':'Portuguese2', + '56':'Ethiopians2', + '57':'Malians2', + '58':'Berbers2', + '59':'Khmer2', + '60':'Malay2', + '61':'Burmese2', + '62':'Vietnamese2', +} + +color_dict = { + "0054A6": "blue", + "FF0000": "red", + "FFFF00": "yellow", + "00A651": "green", + "00FFFF": "cyan", + "92278F": "purple", + "C0C0C0": "gray", + "FF8000": "orange", +} + +# Parse parameters +parser = argparse.ArgumentParser(description="Pull game stats from voobly") +#aoriparam = parser.add_mutually_exclusive_group() +#aoriparam.add_argument("-i", "--installed", action='store_true', help='Default value.') +#aoriparam.add_argument("-a", "--available", action='store_true') +#parser.add_argument("-r", "--refresh", action='store_true', help='Force a refresh of an existing file for today.') +#parser.add_argument("searchstring", nargs='*') +parser.add_argument("-d","--debug", nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level.") +parser.add_argument("--start", required=True,type=int, help="Set starting game number.") +parser.add_argument("--end", required=True,type=int, help="Set ending game number.") +parser.add_argument("--gameid", type=int, help="Fetch data for a specific matchid") +parser.add_argument("--save", action='store_true', help="Show page text instead of parsing") +parser.add_argument("-V","--version", action="version", version="%(prog)s " + vooblystatspyversion) + +args = parser.parse_args() + +debuglevel=0 +if args.debug is None: + # -d was used but no value provided + debuglevel = 10 +elif args.debug: + debuglevel = args.debug + +#if debuglev(10,debuglevel): print(searchstring) + +# Determine filename +#thisfile = fileprefix + "." + aori + "." + today + ".log" +#if debuglev(5,debuglevel): eprint("Using file " + thisfile) + +# Ensure the ~/.dli directory exists +#if not os.path.exists(outdir): +# os.makedirs(outdir) + +def login_session(username, password): + with requests.Session() as s: + s.get('https://www.voobly.com/login') + form = {'username': username, 'password': password} + s.post('https://www.voobly.com/login/auth', data=form) + return s + +def get_game_page(session, gameid): + a=session.get("https://www.voobly.com/match/view/" + str(gameid) + "/Match-Details") + return a.text + +def match(soup): + # check the player numbers + team_list = [] + for i in soup.find_all(name='span', attrs={'style': 'font-size:11px; color:#82909D'}): + team_list.append(i.text[0]) + # the previous guy only cared about equal number of winner count and loser count + #if team_list.count('N') != team_list.count('T'): + # return False + table = soup.find_all(name='td',attrs={'width':'50%','valign': 'top'})[0].find_all('table')[0] + table_data = [[cell.text for cell in row("td")] + for row in table("tr")] + table_data = [x for x in table_data if x != ['']] + + match_dict = dict(table_data) + match_dict['Win'] = [] + match_dict['Loss'] = [] + + for i in soup.find_all('a'): + if "ladder/" in i.get('href'): + match_dict['Ladder'] = i.text + + return match_dict + +def player(match, soup): + player = [['ID','Name','Clan','New rating','Winbool','Change','Civilization','Team','Overall','Military','Economy','Technology','Society']] + player_num = int(match['Players:']) + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0] + key = 'https://voobly.com/profile/' + + for i in table.find_all('a'): #this is printing out all id + if key in i.get('href'): + num = int(''.join(re.findall('[0-9]',i.get('href')))) + #print(int(''.join(re.findall('[0-9]',i.get('href'))))) + player.append([num]) + + counts = 1 + player_counts = 0 + for i in table.find_all('a'): #this is printing out all name + if key in i.get('href'): + player_counts += 1 + name = i.contents[0] + player[counts].append(name) + try: + if re.match("^\[.*]$",i.previous_element): + player[counts].append(i.previous_element) # clan + else: + player[counts].append("") # no clan name + except: + player[counts].append("") # no clan name + counts+=1 + #print(i.contents[0]) + + # source error can happen where the page lists 50 players! + if player_num > (counts-1): + print("[WARNING] readjusting player_num from",player_num,"to ",(counts-1)) + player_num = (counts-1) + + # fetch per-player rating info + counts = 1 + for i in soup.find_all('span'): + x=0 + for item in i.find_all("b"): + x += 1 + # for a regular game, it is this: + #if counts/2 != int(counts/2): + if 3 == x: + if 0 < int(i.find_all("b")[1].text): + # for left column, 0=new rating, 1=points, 2=team + player[counts].append(i.find_all("b")[0].text) + # team number is basically a boolean for "didwin" + player[counts].append(i.find_all("b")[2].text == "1") + else: + # for right column, 0=team, 1=points, 2=new rating + player[counts].append(i.find_all("b")[2].text) + player[counts].append(i.find_all("b")[0].text == "1") + # always include points + player[counts].append(i.find_all("b")[1].text) + counts += 1 + if counts >= 9: + break + + counts = 1 + key = '/res/games/AOC/civs/' + for i in soup.find_all('img'): #this is printing out all civ + if key in i.get('src'): + civ = str(''.join(x for x in i.get('src') if x.isdigit())) + player[counts].append(civ_dict[civ]) + counts += 1 + + counts = 1 + #print(player_num) # DEBUG1 + for i in range(player_num): + #print("counts:",counts,"player:",player[i]) #DEBUG1 + if i >= (player_num/2): + #print(2) + player[counts].append(2) + counts += 1 + else: + #print(1) + player[counts].append(1) + counts += 1 + + counts = 1 + for i in range(player_num): + player[counts].append([]) + player[counts].append([]) + player[counts].append([]) + player[counts].append([]) + player[counts].append([]) + counts += 1 + + # player + #for item in player[8]: + # print(item) + df = pd.DataFrame(player[1:],columns=player[0]) + player_dict = df.to_dict("index") + player_dict = list(player_dict.values()) + return player_dict + +def score(soup): + score = [['Color','Military Score','Economy Score','Technology Score','Society Score','Total']] + lst = [] + count = 0 + playercount = 1 + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0] + + x=0 + hascolor=0 + for i in table.find_all('center')[5:]: + x += 1 + + # player color. This is very inefficient but it works, so I'm stopping. Because the main loop in this function is dependent on table.find_all('center'), but I have to loop over a different set of things, this needs some crazy setup. + counts=0 + for j in table.find_all(name='div',string=''): + if "" == j.text: + counts += 1 + if counts == int((x-1)/5)+1: + color="" + #print(j.attrs["style"].split()) + try: + color = str(color_dict[j.attrs["style"].split()[1].lstrip("#").rstrip(";")]) + if "padding:" == color: color = "nocolor" + except: + color = "nocolor" + if hascolor != 1: + lst.append(color) + hascolor = 1 + break + if hascolor == 1: + break + if hascolor == 1: + break + + hascolor = 1 + + if i.find('div'): + #print(i.find('div').contents[0].replace(',','')) + num = i.find('div').contents[0].replace(',','') + lst.append(num) + count += 1 + else: + #print(i.contents[0].replace(',','')) + num = i.contents[0].replace(',','') + lst.append(num) + count += 1 + + if count == 5: + score.append(lst) + lst = [] + count = 0 + hascolor=0 + playercount += 1 + df = pd.DataFrame(score[1:],columns = score[0]) + score_dict = df.to_dict("index") + score_dict = list(score_dict.values()) + return score_dict + +def military(soup): + military = [['Unit Killed','Unit Lost','Building Razed','Building Lost','Units Converted']] + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[1] + lst = [] + count = 0 + for i in table.find_all('center')[5:]: + if i.find('div'): + num = i.find('div').contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.find('div').contents[0].replace(',','')) + else: + num = i.contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.contents[0].replace(',','')) + if count == 5: + military.append(lst) + lst = [] + count = 0 + df = pd.DataFrame(military[1:],columns = military[0]) + mil_dict = df.to_dict("index") + mil_dict = list(mil_dict.values()) + return mil_dict + +def economy(soup): + economy = [['Food','Wood','Stone','Gold','Trade','Received','Sent']] + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[2] + lst = [] + count = 0 + for i in table.find_all('center')[7:]: + if i.find('div'): + num = i.find('div').contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.find('div').contents[0].replace(',','')) + else: + num = i.contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.contents[0].replace(',','')) + if count == 7: + economy.append(lst) + lst = [] + count = 0 + df = pd.DataFrame(economy[1:],columns = economy[0]) + eco_dict = df.to_dict("index") + eco_dict = list(eco_dict.values()) + return eco_dict + +def tech(soup): + technology = [['Feudal Time','Castle Time','Imperial Time','Map Explored','Research Count','Research Percentage']] + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[3] + lst = [] + count = 0 + for i in table.find_all('center')[6:]: + if i.find('div'): + num = i.find('div').contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.find('div').contents[0].replace(',','')) + else: + num = i.contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.contents[0].replace(',','')) + if count == 6: + technology.append(lst) + lst = [] + count = 0 + df = pd.DataFrame(technology[1:],columns = technology[0]) + tech_dict = df.to_dict("index") + tech_dict = list(tech_dict.values()) + return tech_dict + +def society(soup): + society = [['Total Wonders','Total Castles','Relic Capture','Relic Gold','Villager High']] + table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[4] + lst = [] + count = 0 + for i in table.find_all('center')[5:]: + if i.find('div'): + num = i.find('div').contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.find('div').contents[0].replace(',','')) + else: + num = i.contents[0].replace(',','') + lst.append(num) + count += 1 + #print(i.contents[0].replace(',','')) + if count == 5: + society.append(lst) + lst = [] + count = 0 + df = pd.DataFrame(society[1:],columns = society[0]) + soc_dict = df.to_dict("index") + soc_dict = list(soc_dict.values()) + return soc_dict + +def combine_orig(match,player,score,military,economy,tech,society): + for i in range(len(player)): + player[i]['Overall'] = score[i] + player[i]['Military'] = military[i] + player[i]['Economy'] = economy[i] + player[i]['Technology'] = tech[i] + player[i]['Society'] = society[i] + win = [i for i in player if i['Team'] == 1] + loss = [i for i in player if i['Team'] == 2] + match['Win'] = win + match['Loss'] = loss + return match + +def combine(match,player,score,military,economy,tech,society): + # this is the new combine, really parse_game_page + gameid = match["Match Details"].lstrip("#") + dateplayed = match["Date Played:"] + cal = pdt.Calendar() + dto, _ = cal.parseDT(datetimeString=dateplayed, tzinfo=tz("US/Eastern")) + add_hours = int((str(dto)[-6:])[:3]) + dateplayed = (datetime.timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%M:%SZ') + + # do not use match rating + mapname = match["Map:"] + duration = match["Duration:"] + playercount = match["Players:"] + mod = match["Game Mod:"] + ladder = match["Ladder"] + + # print csv output for game + game_line = ( + "GAME," + + gameid + "," + + dateplayed + "," + + mapname + "," + + duration + "," + + playercount + "," + + mod + "," + + ladder + "," + ) + if False: + print(game_line) + + # build player csv lines + player_line = [] + for i in range(len(player)): + tp = player[i] + ts = score[i] + tm = military[i] + te = economy[i] + tt = tech[i] + tc = society[i] + #print(tp) + #print(ts) + player_line.append( + "PLAYER," + + str(gameid) + "," + + str(tp["ID"]) + "," + + tp["Name"] + "," + + ts["Color"] + "," + + tp["Clan"] + "," + + str(tp["New rating"]) + "," + + str(tp["Change"]) + "," + + str(tp["Winbool"]) + "," + + tp["Civilization"] + "," + + str(ts["Military Score"]) + "," + + str(ts["Economy Score"]) + "," + + str(ts["Technology Score"]) + "," + + str(ts["Society Score"]) + "," + + str(ts["Total"]) + "," + + str(tm["Unit Killed"]) + "," + + str(tm["Unit Lost"]) + "," + + str(tm["Building Razed"]) + "," + + str(tm["Building Lost"]) + "," + + str(tm["Units Converted"]) + "," + + str(te["Food"]) + "," + + str(te["Wood"]) + "," + + str(te["Stone"]) + "," + + str(te["Gold"]) + "," + + str(te["Trade"]) + "," + + str(te["Received"]) + "," + + str(te["Sent"]) + "," + + str(tt["Feudal Time"]) + "," + + str(tt["Castle Time"]) + "," + + str(tt["Imperial Time"]) + "," + + str(tt["Map Explored"]) + "," + + str(tt["Research Count"]) + "," + + str(tt["Research Percentage"]) + "," + + str(tc["Total Wonders"]) + "," + + str(tc["Total Castles"]) + "," + + str(tc["Relic Capture"]) + "," + + str(tc["Relic Gold"]) + "," + + str(tc["Villager High"]) + "," + ) + if False: + print(player_line[i]) + + response = game_line + for i in player_line: + response += "\n" + i + + return response + +def parse_game_page(page_text): + soup = BeautifulSoup(page_text,"html.parser") + try: + if "Page Not Found" == soup.find(name="div",class_="page-title").text or "Age of Empires II: The Conquerors" != soup.find("h3").text: + return "invalid page" + except: + foo = None + + matchid = soup.find_all("a")[19].get('href').split('/')[3] + + # skip this match if it has a computer player + for i in soup.find_all("td"): + if re.match(".*\(Computer\).*",i.text): + return "[ERROR] has computer player: " + matchid + + # capture only aoc + if "Age of Empires II: The Conquerors" != soup.find("h3").text: + return "[ERROR] not aoc: " + matchid + + #parse_game_page(page) + game = match(soup) + play = player(game, soup) + sc = score(soup) + mil = military(soup) + eco = economy(soup) + tec = tech(soup) + soc = society(soup) + return combine(game,play,sc,mil,eco,tec,soc) + +# MAIN +session = login_session("brainpinky","pinkyBrain") + +# 19914658 + +if args.gameid: + a = get_game_page(session,args.gameid).encode('latin-1','replace') + if args.save: + print(a.encode('utf-8')) + else: + print(parse_game_page(a)) + sys.exit(0) + +# span should be 19914650 to 21260965 +# main loop +for i in range(args.start,args.end): + a = get_game_page(session,i).encode('latin-1','replace') + print(parse_game_page(a)) diff --git a/wrap2.sh b/wrap2.sh new file mode 100755 index 0000000..f0d5dcd --- /dev/null +++ b/wrap2.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# end value: 21260965 +test -z "${ENDVALUE}" && ENDVALUE=21260965 + +# run1 start value: 19899000 +# run2 start value: 19903845 +test -z "${STARTVALUE}" && STARTVALUE=19903845 + +test -z "${LOGFILE}" && LOGFILE=~/dev/vooblystats/files/run2.csv + +echo "" > "${LOGFILE}" +{ + printf "%s %s\n" "START" "$( date -u "+%FT%TZ" )" + ./vooblystats.py --start "${STARTVALUE}" --end "${ENDVALUE}" + printf "%s %s\n" "END" "$( date -u "+%FT%TZ" )" +} 2>&1 | tee -a "${LOGFILE}" diff --git a/wrapper.sh b/wrapper.sh new file mode 100755 index 0000000..abdfc1c --- /dev/null +++ b/wrapper.sh @@ -0,0 +1,22 @@ +OUTDIR=~/dev/vooblystats/files +mkdir -p "${OUTDIR}" + +# 18 games per minute, approximately +# cover this sequence 19871439 21184314 + +# need to fork vooblystats.sh 152 times! + +startgameid=19871439 +endgameid=21184314 +span=8640 # games per 8 hours +x=0 +count=152 + +while test $x -lt $count ; +do + x=$(( x + 1 )) + startx="$( printf "%s\n" "${startgameid}+(${x}*${span})" | bc )" + endx="$( printf "%s\n" "${startgameid}+(${x}*${span})+${span}-1" | bc )" + echo "call VS_COOKIEFILE=\$(mktemp) vooblystats.sh ${startx} ${endx} > ${OUTDIR}/gameset-${x}.csv" + #VS_COOKIEFILE="$(mktemp)" ~/dev/vooblystats/vooblystats.sh ${startx} ${endx} > ${OUTDIR}/gameset-${x}.csv & +done -- cgit