#!/usr/bin/env python3
# vim: shiftwidth=4 softtabstop=4 tabstop=4
# File: vooblystats.py
# Author: bgstack15@gmail.com
# Startdate: 2020-02-01 18:55
# Title: Script to Pull Game Data from Voobly
# Purpose: Pull stats from Voobly for data visualiation for 30_Turbo_Swag
# History:
# Usage:
# Reference:
#    ripped primarily from https://github.com/nathankong97/voobly-parse-aoe2-game-data
#    date conversion:
#       https://stackoverflow.com/questions/4615250/convert-relative-date-string-to-absolute-date/4615451#4615451
#       https://github.com/bear/parsedatetime
#       https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime/4770688#4770688
# Improve:
#    add premiumbool? add gamemvp?
#    add player country?
from __future__ import print_function
import argparse, datetime, os, sys, subprocess, re, urllib, requests, pandas as pd
import parsedatetime as pdt
from distutils.spawn import find_executable
from pytz import timezone as tz
sys.path.append("/usr/share/bgscripts/py")
from bgs import debuglev, eprint
from bs4 import BeautifulSoup

vooblystatspyversion="2020-02-02c"

# Define functions

# Default default variables
today = datetime.date.today().isoformat()

# THEORY: when 2 people play the same color, the second player is assigned civ_number+(len(civ_numbers))
civ_dict = {'1':'Britons','2':'Franks','3':'Goths','4':'Teutons','5':'Japanese','6':'Chinese','7':'Byzantines',
            '8':'Persians', '9':'Saracens','10':'Turks','11':'Vikings','12':'Mongols','13':'Celts','14':'Spanish',
            '15':'Aztecs','16':'Mayans','17':'Huns','18':'Koreans','19':'Italians',
            '20':'Indians','21':'Incas','22':'Magyars','23':'Slavs','24':'Portuguese','25':'Ethiopians',
            '26':'Malians','27':'Berbers','28':'Khmer','29':'Malay','30':'Burmese','31':'Vietnamese',
            '32':'Britons2',
            '33':'Franks2',
            '34':'Goths2',
            '35':'Teutons2',
            '36':'Japanese2',
            '37':'Chinese2',
            '38':'Byzantines2',
            '39':'Persians2',
            '40':'Saracens2',
            '41':'Turks2',
            '42':'Vikings2',
            '43':'Mongols2',
            '44':'Celts2',
            '45':'Spanish2',
            '46':'Aztecs2',
            '47':'Mayans2',
            '48':'Huns2',
            '49':'Koreans2',
            '50':'Italians',
            '51':'Indians2',
            '52':'Incas2',
            '53':'Magyars2',
            '54':'Slavs2',
            '55':'Portuguese2',
            '56':'Ethiopians2',
            '57':'Malians2',
            '58':'Berbers2',
            '59':'Khmer2',
            '60':'Malay2',
            '61':'Burmese2',
            '62':'Vietnamese2',
}

color_dict = {
               "0054A6": "blue",
               "FF0000": "red",
               "FFFF00": "yellow",
               "00A651": "green",
               "00FFFF": "cyan",
               "92278F": "purple",
               "C0C0C0": "gray",
               "FF8000": "orange",
}

# Parse parameters
parser = argparse.ArgumentParser(description="Pull game stats from voobly")
#aoriparam = parser.add_mutually_exclusive_group()
#aoriparam.add_argument("-i", "--installed", action='store_true', help='Default value.')
#aoriparam.add_argument("-a", "--available", action='store_true')
#parser.add_argument("-r", "--refresh", action='store_true', help='Force a refresh of an existing file for today.')
#parser.add_argument("searchstring", nargs='*')
parser.add_argument("-d","--debug", nargs='?', default=0, type=int, choices=range(0,11), help="Set debug level.")
parser.add_argument("--start", required=True,type=int, help="Set starting game number.")
parser.add_argument("--end", required=True,type=int, help="Set ending game number.")
parser.add_argument("--gameid", type=int, help="Fetch data for a specific matchid")
parser.add_argument("--save", action='store_true', help="Show page text instead of parsing")
parser.add_argument("-V","--version", action="version", version="%(prog)s " + vooblystatspyversion)

args = parser.parse_args()

debuglevel=0
if args.debug is None:
   # -d was used but no value provided
   debuglevel = 10
elif args.debug:
   debuglevel = args.debug

#if debuglev(10,debuglevel): print(searchstring)

# Determine filename
#thisfile = fileprefix + "." + aori + "." + today + ".log"
#if debuglev(5,debuglevel): eprint("Using file " + thisfile)

# Ensure the ~/.dli directory exists
#if not os.path.exists(outdir):
#   os.makedirs(outdir)

def login_session(username, password):
   with requests.Session() as s:
      s.get('https://www.voobly.com/login')
      form = {'username': username, 'password': password}
      s.post('https://www.voobly.com/login/auth', data=form)
      return s

def get_game_page(session, gameid):
   a=session.get("https://www.voobly.com/match/view/" + str(gameid) + "/Match-Details")
   return a.text

def match(soup):
    # check the player numbers
    team_list = []
    for i in soup.find_all(name='span', attrs={'style': 'font-size:11px; color:#82909D'}):
        team_list.append(i.text[0])
    # the previous guy only cared about equal number of winner count and loser count
    #if team_list.count('N') != team_list.count('T'):
    #    return False
    table = soup.find_all(name='td',attrs={'width':'50%','valign': 'top'})[0].find_all('table')[0]
    table_data = [[cell.text for cell in row("td")]
                            for row in table("tr")]
    table_data = [x for x in table_data if x != ['']]

    match_dict = dict(table_data)
    match_dict['Win'] = []
    match_dict['Loss'] = []

    for i in soup.find_all('a'):
        if "ladder/" in i.get('href'):
            match_dict['Ladder'] = i.text

    gameid = match_dict["Match Details"].lstrip("#")
    #eprint(gameid)
    return match_dict

def player(match, soup):
    player = [['ID','Name','Clan','New rating','Winbool','Change','Civilization','Team','Overall','Military','Economy','Technology','Society']]
    player_num = int(match['Players:'])
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0]
    key = 'https://voobly.com/profile/'

    for i in table.find_all('a'): #this is printing out all id
        if key in i.get('href'):
            num = int(''.join(re.findall('[0-9]',i.get('href'))))
            #print(int(''.join(re.findall('[0-9]',i.get('href')))))
            player.append([num])

    # new way to extract name, from the alternate text from the images
    counts = 1
    player_counts = 0
    x=0
    found_names = []
    for k in soup.find_all('td'):
    #for i in soup.find_all(name='img'):
        for i in k.find_all('img'):
            #print("FOO",i)
            x += 1
            # always need to skip the first two images with alt text
            if x > 2:
                for j in re.findall("alt=\"[^\"]+\"",str(i)):
                    name = j.split('"')[1]
                    #print("investigating",counts," using name ",name)
                    if "Age of Empires II: The Conquerors" != name and counts < min(9, player_num + 1) and name not in found_names:
                        #print("found player number",counts," using name ",name)
                        player_counts += 1
                        player[counts].append(name)
                        found_names.append(name)
                        counts+=1

    # get clan tags
    # this is not always sufficient to get names, because of gameid 20365401 Tic@voobly
    counts = 1
    for i in table.find_all('a'): #this is printing out all name
        if key in i.get('href'):
            #player_counts += 1
            #name = i.contents[0]
            #player[counts].append(name)
            try:
               if re.match("^\[.*]$",i.previous_element):
                  player[counts].append(i.previous_element) # clan
               else:
                  player[counts].append("") # no clan name
            except:
               player[counts].append("") # no clan name
            counts+=1
            #print(i.contents[0])

    # source error can happen where the page lists 50 players!
    if player_num > (counts-1):
        print("[WARNING] readjusting player_num from",player_num,"to ",(counts-1))
        player_num = (counts-1)

    # fetch per-player rating info
    counts = 1
    for i in soup.find_all('span'):
        x=0
        for item in i.find_all("b"):
            x += 1
        # for a regular game, it is this:
        #if counts/2 != int(counts/2):
        if 3 == x:
           if 0 < int(i.find_all("b")[1].text):
              # for left column,  0=new rating, 1=points, 2=team
              player[counts].append(i.find_all("b")[0].text)
              # team number is basically a boolean for "didwin"
              player[counts].append(i.find_all("b")[2].text == "1")
           else:
              # for right column, 0=team, 1=points, 2=new rating
              player[counts].append(i.find_all("b")[2].text)
              player[counts].append(i.find_all("b")[0].text == "1")
           # always include points
           player[counts].append(i.find_all("b")[1].text)
           counts += 1
           if counts >= 9:
               break

    counts = 1
    key = '/res/games/AOC/civs/'
    for i in soup.find_all('img'): #this is printing out all civ
        if key in i.get('src'):
            civ = str(''.join(x for x in i.get('src') if x.isdigit()))
            player[counts].append(civ_dict[civ])
            counts += 1

    counts = 1
    #print(player_num) # DEBUG1
    for i in range(player_num):
        #print("counts:",counts,"player:",player[i]) #DEBUG1
        if i >= (player_num/2):
            #print(2)
            player[counts].append(2)
            counts += 1
        else:
            #print(1)
            player[counts].append(1)
            counts += 1

    counts = 1
    for i in range(player_num):
        player[counts].append([])
        player[counts].append([])
        player[counts].append([])
        player[counts].append([])
        player[counts].append([])
        counts += 1

    # player
    #for item in player[8]:
    #    print(item)
    df = pd.DataFrame(player[1:],columns=player[0])
    player_dict = df.to_dict("index")
    player_dict = list(player_dict.values())
    return player_dict

def score(soup):
    score = [['Color','Military Score','Economy Score','Technology Score','Society Score','Total']]
    lst = []
    count = 0
    playercount = 1
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[0]

    x=0
    hascolor=0
    for i in table.find_all('center')[5:]:
        x += 1

        # player color. This is very inefficient but it works, so I'm stopping. Because the main loop in this function is dependent on table.find_all('center'), but I have to loop over a different set of things, this needs some crazy setup.
        counts=0
        for j in table.find_all(name='div',string=''):
            if "" == j.text:
                counts += 1
                if counts == int((x-1)/5)+1:
                    color=""
                    #print(j.attrs["style"].split())
                    try:
                        color = str(color_dict[j.attrs["style"].split()[1].lstrip("#").rstrip(";")])
                        if "padding:" == color: color = "nocolor"
                    except:
                        color = "nocolor"
                    if hascolor != 1:
                        lst.append(color)
                        hascolor = 1
                    break
                if hascolor == 1:
                    break
            if hascolor == 1:
                break

        hascolor = 1
     
        if i.find('div'):
            #print(i.find('div').contents[0].replace(',',''))
            num = i.find('div').contents[0].replace(',','')
            lst.append(num)
            count += 1
        else:
            #print(i.contents[0].replace(',',''))
            num = i.contents[0].replace(',','')
            lst.append(num)
            count += 1

        if count == 5:
            score.append(lst)
            lst = []
            count = 0
            hascolor=0
            playercount += 1
    df = pd.DataFrame(score[1:],columns = score[0])
    score_dict = df.to_dict("index")
    score_dict = list(score_dict.values())
    return score_dict

def military(soup):
    military = [['Unit Killed','Unit Lost','Building Razed','Building Lost','Units Converted']]
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[1]
    lst = []
    count = 0
    for i in table.find_all('center')[5:]:
        if i.find('div'):
            num = i.find('div').contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.find('div').contents[0].replace(',',''))
        else:
            num = i.contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.contents[0].replace(',',''))
        if count == 5:
            military.append(lst)
            lst = []
            count = 0
    df = pd.DataFrame(military[1:],columns = military[0])
    mil_dict = df.to_dict("index")
    mil_dict = list(mil_dict.values())
    return mil_dict

def economy(soup):
    economy = [['Food','Wood','Stone','Gold','Trade','Received','Sent']]
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[2]
    lst = []
    count = 0
    for i in table.find_all('center')[7:]:
        if i.find('div'):
            num = i.find('div').contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.find('div').contents[0].replace(',',''))
        else:
            num = i.contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.contents[0].replace(',',''))
        if count == 7:
            economy.append(lst)
            lst = []
            count = 0
    df = pd.DataFrame(economy[1:],columns = economy[0])
    eco_dict = df.to_dict("index")
    eco_dict = list(eco_dict.values())
    return eco_dict

def tech(soup):
    technology = [['Feudal Time','Castle Time','Imperial Time','Map Explored','Research Count','Research Percentage']]
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[3]
    lst = []
    count = 0
    for i in table.find_all('center')[6:]:
        if i.find('div'):
            num = i.find('div').contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.find('div').contents[0].replace(',',''))
        else:
            num = i.contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.contents[0].replace(',',''))
        if count == 6:
            technology.append(lst)
            lst = []
            count = 0
    df = pd.DataFrame(technology[1:],columns = technology[0])
    tech_dict = df.to_dict("index")
    tech_dict = list(tech_dict.values())
    return tech_dict

def society(soup):
    society = [['Total Wonders','Total Castles','Relic Capture','Relic Gold','Villager High']]
    table = soup.find_all(name='table',attrs={'width':'100%','border': '0'})[4]
    lst = []
    count = 0
    for i in table.find_all('center')[5:]:
        if i.find('div'):
            num = i.find('div').contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.find('div').contents[0].replace(',',''))
        else:
            num = i.contents[0].replace(',','')
            lst.append(num)
            count += 1
            #print(i.contents[0].replace(',',''))
        if count == 5:
            society.append(lst)
            lst = []
            count = 0
    df = pd.DataFrame(society[1:],columns = society[0])
    soc_dict = df.to_dict("index")
    soc_dict = list(soc_dict.values())
    return soc_dict

def combine_orig(match,player,score,military,economy,tech,society):
    for i in range(len(player)):
        player[i]['Overall'] = score[i]
        player[i]['Military'] = military[i]
        player[i]['Economy'] = economy[i]
        player[i]['Technology'] = tech[i]
        player[i]['Society'] = society[i]
    win = [i for i in player if i['Team'] == 1]
    loss = [i for i in player if i['Team'] == 2]
    match['Win'] = win
    match['Loss'] = loss
    return match

def combine(match,player,score,military,economy,tech,society):
    # this is the new combine, really parse_game_page
    gameid = match["Match Details"].lstrip("#")
    dateplayed = match["Date Played:"]
    cal = pdt.Calendar()
    dto, _ = cal.parseDT(datetimeString=dateplayed, tzinfo=tz("US/Eastern"))
    add_hours = int((str(dto)[-6:])[:3])
    dateplayed = (datetime.timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%M:%SZ')

    # do not use match rating
    mapname = match["Map:"]
    duration = match["Duration:"]
    playercount = match["Players:"]
    mod = match["Game Mod:"]
    ladder = match["Ladder"]

    # print csv output for game
    game_line = (
        "GAME," +
        gameid + "," +
        dateplayed + "," +
        mapname + "," +
        duration + "," +
        playercount + "," +
        mod + "," +
        ladder + ","
    )
    if False:
        print(game_line)

    # build player csv lines
    player_line = []
    for i in range(len(player)):
        tp = player[i]
        ts = score[i]
        tm = military[i]
        te = economy[i]
        tt = tech[i]
        tc = society[i]
        #print(tp)
        #print(ts)
        player_line.append(
            "PLAYER," +
            str(gameid) + "," +
            str(tp["ID"]) + "," +
            str(tp["Name"]) + "," +
            str(ts["Color"]) + "," +
            str(tp["Clan"]) + "," +
            str(tp["New rating"]) + "," +
            str(tp["Change"]) + "," +
            str(tp["Winbool"]) + "," +
            str(tp["Civilization"]) + "," +
            str(ts["Military Score"]) + "," +
            str(ts["Economy Score"]) + "," +
            str(ts["Technology Score"]) + "," +
            str(ts["Society Score"]) + "," +
            str(ts["Total"]) + "," +
            str(tm["Unit Killed"]) + "," +
            str(tm["Unit Lost"]) + "," +
            str(tm["Building Razed"]) + "," +
            str(tm["Building Lost"]) + "," +
            str(tm["Units Converted"]) + "," +
            str(te["Food"]) + "," +
            str(te["Wood"]) + "," +
            str(te["Stone"]) + "," +
            str(te["Gold"]) + "," +
            str(te["Trade"]) + "," +
            str(te["Received"]) + "," +
            str(te["Sent"]) + "," +
            str(tt["Feudal Time"]) + "," +
            str(tt["Castle Time"]) + "," +
            str(tt["Imperial Time"]) + "," +
            str(tt["Map Explored"]) + "," +
            str(tt["Research Count"]) + "," +
            str(tt["Research Percentage"]) + "," +
            str(tc["Total Wonders"]) + "," +
            str(tc["Total Castles"]) + "," +
            str(tc["Relic Capture"]) + "," +
            str(tc["Relic Gold"]) + "," +
            str(tc["Villager High"]) + ","
        )
        if False:
            print(player_line[i])

    response = game_line 
    for i in player_line:
        response += "\n" + i

    return response

def parse_game_page(page_text):
   soup = BeautifulSoup(page_text,"html.parser")
   try:
      if "Page Not Found" == soup.find(name="div",class_="page-title").text or "Age of Empires II: The Conquerors" != soup.find("h3").text:
         return "invalid page"
   except:
      foo = None

   matchid = soup.find_all("a")[19].get('href').split('/')[3]

   # skip this match if it has a computer player
   for i in soup.find_all("td"):
      if re.match(".*\(Computer\).*",i.text):
         return "[ERROR] has computer player: " + matchid

   # capture only aoc
   if "Age of Empires II: The Conquerors" != soup.find("h3").text:
      return "[ERROR] not aoc: " + matchid

   #parse_game_page(page)
   game = match(soup)
   play = player(game, soup)
   sc = score(soup)
   mil = military(soup)
   eco = economy(soup)
   tec = tech(soup)
   soc = society(soup)
   return combine(game,play,sc,mil,eco,tec,soc)

# MAIN
session = login_session("brainpinky","pinkyBrain")

# 19914658

if args.gameid:
   a = get_game_page(session,args.gameid).encode('latin-1','replace')
   if args.save:
      print(a.encode('utf-8'))
   else:
      print(parse_game_page(a))
   sys.exit(0)

# span should be 19914650 to 21260965
# main loop
for i in range(args.start,args.end):
   a = get_game_page(session,i).encode('latin-1','replace')
   print(parse_game_page(a))