aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rwxr-xr-xvooblystats.py555
-rwxr-xr-xwrap2.sh17
-rwxr-xr-xwrapper.sh22
4 files changed, 596 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 4ddf8bb..82bdcec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@ cookies
old
*.conf
*.swp
+*.html
+files/
diff --git a/vooblystats.py b/vooblystats.py
new file mode 100755
index 0000000..b97b3c8
--- /dev/null
+++ b/vooblystats.py
@@ -0,0 +1,555 @@
+#!/usr/bin/env python3
+# vim: shiftwidth=4 softtabstop=4 tabstop=4
+# File: vooblystats.py
+# Author: bgstack15@gmail.com
+# Startdate: 2020-02-01 18:55
+# Title: Script to Pull Game Data from Voobly
+# Purpose: Pull stats from Voobly for data visualiation for 30_Turbo_Swag
+# History:
+# Usage:
+# Reference:
+# ripped primarily from https://github.com/nathankong97/voobly-parse-aoe2-game-data
+# date conversion:
+# https://stackoverflow.com/questions/4615250/convert-relative-date-string-to-absolute-date/4615451#4615451
+# https://github.com/bear/parsedatetime
+# https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime/4770688#4770688
+# Improve:
+# add premiumbool? add gamemvp?
+# add player country?
+from __future__ import print_function
+import argparse, datetime, os, sys, subprocess, re, urllib, requests, pandas as pd
+import parsedatetime as pdt
+from distutils.spawn import find_executable
+from pytz import timezone as tz
+sys.path.append("/usr/share/bgscripts/py")
+from bgs import debuglev, eprint
+from bs4 import BeautifulSoup
+
+vooblystatspyversion="2020-02-02c"
+
+# Define functions
+
+# Default default variables
+today = datetime.date.today().isoformat()
+
+# THEORY: when 2 people play the same color, the second player is assigned civ_number+(len(civ_numbers))
+civ_dict = {'1':'Britons','2':'Franks','3':'Goths','4':'Teutons','5':'Japanese','6':'Chinese','7':'Byzantines',
+ '8':'Persians', '9':'Saracens','10':'Turks','11':'Vikings','12':'Mongols','13':'Celts','14':'Spanish',
+ '15':'Aztecs','16':'Mayans','17':'Huns','18':'Koreans','19':'Italians',
+ '20':'Indians','21':'Incas','22':'Magyars','23':'Slavs','24':'Portuguese','25':'Ethiopians',
+ '26':'Malians','27':'Berbers','28':'Khmer','29':'Malay','30':'Burmese','31':'Vietnamese',
+ '32':'Britons2',
+ '33':'Franks2',
+ '34':'Goths2',
+ '35':'Teutons2',
+ '36':'Japanese2',
+ '37':'Chinese2',
+ '38':'Byzantines2',
+ '39':'Persians2',
+ '40':'Saracens2',
+ '41':'Turks2',
+ '42':'Vikings2',
+ '43':'Mongols2',
+ '44':'Celts2',
+ '45':'Spanish2',
+ '46':'Aztecs2',
+ '47':'Mayans2',
+ '48':'Huns2',
+ '49':'Koreans2',
+ '50':'Italians',
+ '51':'Indians2',
+ '52':'Incas2',
+ '53':'Magyars2',
+ '54':'Slavs2',
+ '55':'Portuguese2',
+ '56':'Ethiopians2',
+ '57':'Malians2',
+ '58':'Berbers2',
+ '59':'Khmer2',
+ '60':'Malay2',
+ '61':'Burmese2',
+ '62':'Vietnamese2',
+}
+
+color_dict = {
+ "0054A6": "blue",
+ "FF0000": "red",
+ "FFFF00": "yellow",
+ "00A651": "green",
+ "00FFFF": "cyan",
+ "92278F": "purple",
+ "C0C0C0": "gray",
+ "FF8000": "orange",
+}
+
+# Parse parameters
+parser = argparse.ArgumentParser(description="Pull game stats from voobly")
+#aoriparam = parser.add_mutually_exclusive_group()
+#aoriparam.add_argument("-i", "--installed", action='store_true', help='Default value.')
+#aoriparam.add_argument("-a", "--available", action='store_true')
+#parser.add_argument("-r", "--refresh", action='store_true', help='Force a refresh of an existing file for today.')
+#parser.add_argument("searchstring", nargs='*')
+parser.add_argument("-d","--debug", nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level.")
+parser.add_argument("--start", required=True,type=int, help="Set starting game number.")
+parser.add_argument("--end", required=True,type=int, help="Set ending game number.")
+parser.add_argument("--gameid", type=int, help="Fetch data for a specific matchid")
+parser.add_argument("--save", action='store_true', help="Show page text instead of parsing")
+parser.add_argument("-V","--version", action="version", version="%(prog)s " + vooblystatspyversion)
+
+args = parser.parse_args()
+
+debuglevel=0
+if args.debug is None:
+ # -d was used but no value provided
+ debuglevel = 10
+elif args.debug:
+ debuglevel = args.debug
+
+#if debuglev(10,debuglevel): print(searchstring)
+
+# Determine filename
+#thisfile = fileprefix + "." + aori + "." + today + ".log"
+#if debuglev(5,debuglevel): eprint("Using file " + thisfile)
+
+# Ensure the ~/.dli directory exists
+#if not os.path.exists(outdir):
+# os.makedirs(outdir)
+
+def login_session(username, password):
+ with requests.Session() as s:
+ s.get('https://www.voobly.com/login')
+ form = {'username': username, 'password': password}
+ s.post('https://www.voobly.com/login/auth', data=form)
+ return s
+
+def get_game_page(session, gameid):
+ a=session.get("https://www.voobly.com/match/view/" + str(gameid) + "/Match-Details")
+ return a.text
+
+def match(soup):
+ # check the player numbers
+ team_list = []
+ for i in soup.find_all(name='span', attrs={'style': 'font-size:11px; color:#82909D'}):
+ team_list.append(i.text[0])
+ # the previous guy only cared about equal number of winner count and loser count
+ #if team_list.count('N') != team_list.count('T'):
+ # return False
+ table = soup.find_all(name='td',attrs={'width':'50%','valign': 'top'})[0].find_all('table')[0]
+ table_data = [[cell.text for cell in row("td")]
+ for row in table("tr")]
+ table_data = [x for x in table_data if x != ['']]
+
+ match_dict = dict(table_data)
+ match_dict['Win'] = []
+ match_dict['Loss'] = []
+
+ for i in soup.find_all('a'):
+ if "ladder/" in i.get('href'):
+ match_dict['Ladder'] = i.text
+
+ return match_dict
+
+def player(match, soup):
+ player = [['ID','Name','Clan','New rating','Winbool','Change','Civilization','Team','Overall','Military','Economy','Technology','Society']]
+ player_num = int(match['Players:'])
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0]
+ key = 'https://voobly.com/profile/'
+
+ for i in table.find_all('a'): #this is printing out all id
+ if key in i.get('href'):
+ num = int(''.join(re.findall('[0-9]',i.get('href'))))
+ #print(int(''.join(re.findall('[0-9]',i.get('href')))))
+ player.append([num])
+
+ counts = 1
+ player_counts = 0
+ for i in table.find_all('a'): #this is printing out all name
+ if key in i.get('href'):
+ player_counts += 1
+ name = i.contents[0]
+ player[counts].append(name)
+ try:
+ if re.match("^\[.*]$",i.previous_element):
+ player[counts].append(i.previous_element) # clan
+ else:
+ player[counts].append("") # no clan name
+ except:
+ player[counts].append("") # no clan name
+ counts+=1
+ #print(i.contents[0])
+
+ # source error can happen where the page lists 50 players!
+ if player_num > (counts-1):
+ print("[WARNING] readjusting player_num from",player_num,"to ",(counts-1))
+ player_num = (counts-1)
+
+ # fetch per-player rating info
+ counts = 1
+ for i in soup.find_all('span'):
+ x=0
+ for item in i.find_all("b"):
+ x += 1
+ # for a regular game, it is this:
+ #if counts/2 != int(counts/2):
+ if 3 == x:
+ if 0 < int(i.find_all("b")[1].text):
+ # for left column, 0=new rating, 1=points, 2=team
+ player[counts].append(i.find_all("b")[0].text)
+ # team number is basically a boolean for "didwin"
+ player[counts].append(i.find_all("b")[2].text == "1")
+ else:
+ # for right column, 0=team, 1=points, 2=new rating
+ player[counts].append(i.find_all("b")[2].text)
+ player[counts].append(i.find_all("b")[0].text == "1")
+ # always include points
+ player[counts].append(i.find_all("b")[1].text)
+ counts += 1
+ if counts >= 9:
+ break
+
+ counts = 1
+ key = '/res/games/AOC/civs/'
+ for i in soup.find_all('img'): #this is printing out all civ
+ if key in i.get('src'):
+ civ = str(''.join(x for x in i.get('src') if x.isdigit()))
+ player[counts].append(civ_dict[civ])
+ counts += 1
+
+ counts = 1
+ #print(player_num) # DEBUG1
+ for i in range(player_num):
+ #print("counts:",counts,"player:",player[i]) #DEBUG1
+ if i >= (player_num/2):
+ #print(2)
+ player[counts].append(2)
+ counts += 1
+ else:
+ #print(1)
+ player[counts].append(1)
+ counts += 1
+
+ counts = 1
+ for i in range(player_num):
+ player[counts].append([])
+ player[counts].append([])
+ player[counts].append([])
+ player[counts].append([])
+ player[counts].append([])
+ counts += 1
+
+ # player
+ #for item in player[8]:
+ # print(item)
+ df = pd.DataFrame(player[1:],columns=player[0])
+ player_dict = df.to_dict("index")
+ player_dict = list(player_dict.values())
+ return player_dict
+
+def score(soup):
+ score = [['Color','Military Score','Economy Score','Technology Score','Society Score','Total']]
+ lst = []
+ count = 0
+ playercount = 1
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0]
+
+ x=0
+ hascolor=0
+ for i in table.find_all('center')[5:]:
+ x += 1
+
+ # player color. This is very inefficient but it works, so I'm stopping. Because the main loop in this function is dependent on table.find_all('center'), but I have to loop over a different set of things, this needs some crazy setup.
+ counts=0
+ for j in table.find_all(name='div',string=''):
+ if "" == j.text:
+ counts += 1
+ if counts == int((x-1)/5)+1:
+ color=""
+ #print(j.attrs["style"].split())
+ try:
+ color = str(color_dict[j.attrs["style"].split()[1].lstrip("#").rstrip(";")])
+ if "padding:" == color: color = "nocolor"
+ except:
+ color = "nocolor"
+ if hascolor != 1:
+ lst.append(color)
+ hascolor = 1
+ break
+ if hascolor == 1:
+ break
+ if hascolor == 1:
+ break
+
+ hascolor = 1
+
+ if i.find('div'):
+ #print(i.find('div').contents[0].replace(',',''))
+ num = i.find('div').contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ else:
+ #print(i.contents[0].replace(',',''))
+ num = i.contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+
+ if count == 5:
+ score.append(lst)
+ lst = []
+ count = 0
+ hascolor=0
+ playercount += 1
+ df = pd.DataFrame(score[1:],columns = score[0])
+ score_dict = df.to_dict("index")
+ score_dict = list(score_dict.values())
+ return score_dict
+
+def military(soup):
+ military = [['Unit Killed','Unit Lost','Building Razed','Building Lost','Units Converted']]
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[1]
+ lst = []
+ count = 0
+ for i in table.find_all('center')[5:]:
+ if i.find('div'):
+ num = i.find('div').contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.find('div').contents[0].replace(',',''))
+ else:
+ num = i.contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.contents[0].replace(',',''))
+ if count == 5:
+ military.append(lst)
+ lst = []
+ count = 0
+ df = pd.DataFrame(military[1:],columns = military[0])
+ mil_dict = df.to_dict("index")
+ mil_dict = list(mil_dict.values())
+ return mil_dict
+
+def economy(soup):
+ economy = [['Food','Wood','Stone','Gold','Trade','Received','Sent']]
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[2]
+ lst = []
+ count = 0
+ for i in table.find_all('center')[7:]:
+ if i.find('div'):
+ num = i.find('div').contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.find('div').contents[0].replace(',',''))
+ else:
+ num = i.contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.contents[0].replace(',',''))
+ if count == 7:
+ economy.append(lst)
+ lst = []
+ count = 0
+ df = pd.DataFrame(economy[1:],columns = economy[0])
+ eco_dict = df.to_dict("index")
+ eco_dict = list(eco_dict.values())
+ return eco_dict
+
+def tech(soup):
+ technology = [['Feudal Time','Castle Time','Imperial Time','Map Explored','Research Count','Research Percentage']]
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[3]
+ lst = []
+ count = 0
+ for i in table.find_all('center')[6:]:
+ if i.find('div'):
+ num = i.find('div').contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.find('div').contents[0].replace(',',''))
+ else:
+ num = i.contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.contents[0].replace(',',''))
+ if count == 6:
+ technology.append(lst)
+ lst = []
+ count = 0
+ df = pd.DataFrame(technology[1:],columns = technology[0])
+ tech_dict = df.to_dict("index")
+ tech_dict = list(tech_dict.values())
+ return tech_dict
+
+def society(soup):
+ society = [['Total Wonders','Total Castles','Relic Capture','Relic Gold','Villager High']]
+ table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[4]
+ lst = []
+ count = 0
+ for i in table.find_all('center')[5:]:
+ if i.find('div'):
+ num = i.find('div').contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.find('div').contents[0].replace(',',''))
+ else:
+ num = i.contents[0].replace(',','')
+ lst.append(num)
+ count += 1
+ #print(i.contents[0].replace(',',''))
+ if count == 5:
+ society.append(lst)
+ lst = []
+ count = 0
+ df = pd.DataFrame(society[1:],columns = society[0])
+ soc_dict = df.to_dict("index")
+ soc_dict = list(soc_dict.values())
+ return soc_dict
+
+def combine_orig(match,player,score,military,economy,tech,society):
+ for i in range(len(player)):
+ player[i]['Overall'] = score[i]
+ player[i]['Military'] = military[i]
+ player[i]['Economy'] = economy[i]
+ player[i]['Technology'] = tech[i]
+ player[i]['Society'] = society[i]
+ win = [i for i in player if i['Team'] == 1]
+ loss = [i for i in player if i['Team'] == 2]
+ match['Win'] = win
+ match['Loss'] = loss
+ return match
+
+def combine(match,player,score,military,economy,tech,society):
+ # this is the new combine, really parse_game_page
+ gameid = match["Match Details"].lstrip("#")
+ dateplayed = match["Date Played:"]
+ cal = pdt.Calendar()
+ dto, _ = cal.parseDT(datetimeString=dateplayed, tzinfo=tz("US/Eastern"))
+ add_hours = int((str(dto)[-6:])[:3])
+ dateplayed = (datetime.timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%M:%SZ')
+
+ # do not use match rating
+ mapname = match["Map:"]
+ duration = match["Duration:"]
+ playercount = match["Players:"]
+ mod = match["Game Mod:"]
+ ladder = match["Ladder"]
+
+ # print csv output for game
+ game_line = (
+ "GAME," +
+ gameid + "," +
+ dateplayed + "," +
+ mapname + "," +
+ duration + "," +
+ playercount + "," +
+ mod + "," +
+ ladder + ","
+ )
+ if False:
+ print(game_line)
+
+ # build player csv lines
+ player_line = []
+ for i in range(len(player)):
+ tp = player[i]
+ ts = score[i]
+ tm = military[i]
+ te = economy[i]
+ tt = tech[i]
+ tc = society[i]
+ #print(tp)
+ #print(ts)
+ player_line.append(
+ "PLAYER," +
+ str(gameid) + "," +
+ str(tp["ID"]) + "," +
+ tp["Name"] + "," +
+ ts["Color"] + "," +
+ tp["Clan"] + "," +
+ str(tp["New rating"]) + "," +
+ str(tp["Change"]) + "," +
+ str(tp["Winbool"]) + "," +
+ tp["Civilization"] + "," +
+ str(ts["Military Score"]) + "," +
+ str(ts["Economy Score"]) + "," +
+ str(ts["Technology Score"]) + "," +
+ str(ts["Society Score"]) + "," +
+ str(ts["Total"]) + "," +
+ str(tm["Unit Killed"]) + "," +
+ str(tm["Unit Lost"]) + "," +
+ str(tm["Building Razed"]) + "," +
+ str(tm["Building Lost"]) + "," +
+ str(tm["Units Converted"]) + "," +
+ str(te["Food"]) + "," +
+ str(te["Wood"]) + "," +
+ str(te["Stone"]) + "," +
+ str(te["Gold"]) + "," +
+ str(te["Trade"]) + "," +
+ str(te["Received"]) + "," +
+ str(te["Sent"]) + "," +
+ str(tt["Feudal Time"]) + "," +
+ str(tt["Castle Time"]) + "," +
+ str(tt["Imperial Time"]) + "," +
+ str(tt["Map Explored"]) + "," +
+ str(tt["Research Count"]) + "," +
+ str(tt["Research Percentage"]) + "," +
+ str(tc["Total Wonders"]) + "," +
+ str(tc["Total Castles"]) + "," +
+ str(tc["Relic Capture"]) + "," +
+ str(tc["Relic Gold"]) + "," +
+ str(tc["Villager High"]) + ","
+ )
+ if False:
+ print(player_line[i])
+
+ response = game_line
+ for i in player_line:
+ response += "\n" + i
+
+ return response
+
+def parse_game_page(page_text):
+ soup = BeautifulSoup(page_text,"html.parser")
+ try:
+ if "Page Not Found" == soup.find(name="div",class_="page-title").text or "Age of Empires II: The Conquerors" != soup.find("h3").text:
+ return "invalid page"
+ except:
+ foo = None
+
+ matchid = soup.find_all("a")[19].get('href').split('/')[3]
+
+ # skip this match if it has a computer player
+ for i in soup.find_all("td"):
+ if re.match(".*\(Computer\).*",i.text):
+ return "[ERROR] has computer player: " + matchid
+
+ # capture only aoc
+ if "Age of Empires II: The Conquerors" != soup.find("h3").text:
+ return "[ERROR] not aoc: " + matchid
+
+ #parse_game_page(page)
+ game = match(soup)
+ play = player(game, soup)
+ sc = score(soup)
+ mil = military(soup)
+ eco = economy(soup)
+ tec = tech(soup)
+ soc = society(soup)
+ return combine(game,play,sc,mil,eco,tec,soc)
+
+# MAIN
+session = login_session("brainpinky","pinkyBrain")
+
+# 19914658
+
+if args.gameid:
+ a = get_game_page(session,args.gameid).encode('latin-1','replace')
+ if args.save:
+ print(a.encode('utf-8'))
+ else:
+ print(parse_game_page(a))
+ sys.exit(0)
+
+# span should be 19914650 to 21260965
+# main loop
+for i in range(args.start,args.end):
+ a = get_game_page(session,i).encode('latin-1','replace')
+ print(parse_game_page(a))
diff --git a/wrap2.sh b/wrap2.sh
new file mode 100755
index 0000000..f0d5dcd
--- /dev/null
+++ b/wrap2.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# end value: 21260965
+test -z "${ENDVALUE}" && ENDVALUE=21260965
+
+# run1 start value: 19899000
+# run2 start value: 19903845
+test -z "${STARTVALUE}" && STARTVALUE=19903845
+
+test -z "${LOGFILE}" && LOGFILE=~/dev/vooblystats/files/run2.csv
+
+echo "" > "${LOGFILE}"
+{
+ printf "%s %s\n" "START" "$( date -u "+%FT%TZ" )"
+ ./vooblystats.py --start "${STARTVALUE}" --end "${ENDVALUE}"
+ printf "%s %s\n" "END" "$( date -u "+%FT%TZ" )"
+} 2>&1 | tee -a "${LOGFILE}"
diff --git a/wrapper.sh b/wrapper.sh
new file mode 100755
index 0000000..abdfc1c
--- /dev/null
+++ b/wrapper.sh
@@ -0,0 +1,22 @@
+OUTDIR=~/dev/vooblystats/files
+mkdir -p "${OUTDIR}"
+
+# 18 games per minute, approximately
+# cover this sequence 19871439 21184314
+
+# need to fork vooblystats.sh 152 times!
+
+startgameid=19871439
+endgameid=21184314
+span=8640 # games per 8 hours
+x=0
+count=152
+
+while test $x -lt $count ;
+do
+ x=$(( x + 1 ))
+ startx="$( printf "%s\n" "${startgameid}+(${x}*${span})" | bc )"
+ endx="$( printf "%s\n" "${startgameid}+(${x}*${span})+${span}-1" | bc )"
+ echo "call VS_COOKIEFILE=\$(mktemp) vooblystats.sh ${startx} ${endx} > ${OUTDIR}/gameset-${x}.csv"
+ #VS_COOKIEFILE="$(mktemp)" ~/dev/vooblystats/vooblystats.sh ${startx} ${endx} > ${OUTDIR}/gameset-${x}.csv &
+done
bgstack15