from datetime import date, timedelta from re import match from typing import List import numpy as np from numpy.core.fromnumeric import mean from pandas._libs.tslibs.timestamps import Timestamp from pandas.tseries.offsets import Second from common.Def import * from common.CommonFuc import * import pandas as pd #分析主线在线时长 def CalcMainlineFinishOnline(date, day): def GetmainlandData(): mainlandList = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) curdata = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate) if curdata is None: break lastdate = GetDateNext(curdatetime, -1) curLoginData = GetCsvData(LogType.Bill, BillType.Login.name, curdate) lastLoginData = GetCsvData(LogType.Bill, BillType.Login.name, lastdate) def GetOnlineTime(uid, timeStamp): _csvData = curLoginData[(curLoginData["uid"] == uid) & (curLoginData["reonline"] == 0) & (curLoginData["timeStamp"] <= timeStamp)] if len(_csvData) > 0: _last = _csvData.iloc[-1] return _last.onlineTime + timeStamp - _last.timeStamp if lastLoginData is not None: _csvData = lastLoginData[(lastLoginData["uid"] == uid) & (lastLoginData["reonline"] == 0) & (lastLoginData["timeStamp"] <= timeStamp)] if len(_csvData) > 0: _last = _csvData.iloc[-1] return _last.onlineTime + timeStamp - _last.timeStamp return 0 if curdata is not None: curdata = curdata[(curdata["mainlandType"] == MainlandType.Mainline) & (curdata["isWin"] == 0)] curdata = curdata[["logtime","timeStamp", "uid", "mainlandId"]] curdata["onlinetime"] = curdata.apply(lambda x : GetOnlineTime(x.uid, x.timeStamp), axis=1) mainlandList.append(curdata) mainlandData = pd.concat(mainlandList) return mainlandData mainlandData = GetmainlandData() groupList = [] for name, group in mainlandData.groupby("uid"): group["onlinetime_diff"] = (group["onlinetime"] - group["onlinetime"].shift()).fillna(0) group["state"] = (group["mainlandId"].values % 10000) // 100 group_data = group.groupby(by=['state'])['onlinetime_diff'].agg([ ("onlinetime_avg", np.mean), ("onlinetime_state", np.sum) ]).reset_index() _one = pd.merge(group, group_data, on="state") #group["onlinetime_avg"] = group_data[group["state"] == group_data["state"]]["onlinetime_avg"] groupList.append(_one) mainlandData = pd.concat(groupList) return mainlandData #筛选指定玩家bill数据 def CalcPlayerBillData(uid, billtype, date, day): list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) billtype = BillType(billtype) curdata = GetCsvData(LogType.Bill, billtype.name, curdate) if billtype == BillType.MailOpBegin: list.append(curdata[(curdata["opUid"] == uid) | (curdata["ownerUid"] == uid)]) else: list.append(curdata[curdata["uid"] == uid]) data = pd.concat(list) return data #筛选指定玩家bdc数据 def CalcPlayerBdcData(uid, bdctype, date, day): list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) bdctype = BdcType(bdctype) curdata = GetCsvData(LogType.Bdc, bdctype.name, curdate) list.append(curdata[curdata["role_id"] == uid]) data = pd.concat(list) return data #计算付费用户 def GetPayPlayer(date, day): list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) csvdata = GetCsvData(LogType.Bill, BillType.PayGoogleSuccessRes.name, curdate) if csvdata is None: return [] csvdata = csvdata[(csvdata["ret"] == 0) & (csvdata["payStatus"] == 2)] #csvdata = [["logtime", "timeStamp", "uid"]] list.append(csvdata) data = pd.concat(list) data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False) return data["uid"].tolist() #获取创号用户 def GetCreateRole(date, day = 1, server_id = 0): list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) csvdata = GetCsvData(LogType.Bdc, BdcType.CreateRole.name, curdate) if csvdata is None: return [] if server_id > 0: csvdata = csvdata[csvdata["server_id"] == server_id] #csvdata = csvdata[["logtime", "timeSta mp", "role_id"]] list.append(csvdata) data = pd.concat(list) return data["role_id"].tolist() #获取玩家首次在线时长 def CalcFistOnlineTime(date, day): list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) csvdata = GetCsvData(LogType.Bill, BillType.Logout.name, curdate) csvdata = csvdata[csvdata["loginCount"] == 1] csvdata = csvdata[["logtime", "timeStamp", "uid", "onlineTime"]] list.append(csvdata) data = pd.concat(list) return data def GetFirstDayOnlineTime(date): createRole = GetCreateRole(date) loginData = GetCsvData(LogType.Bill, BillType.Login.name, date) loginData = loginData[loginData["uid"].isin(createRole)] logoutData = GetCsvData(LogType.Bill, BillType.Logout.name, date) logoutData = logoutData[logoutData["uid"].isin(createRole)] endtime = DateToTime(date, 1) list = [] for i in range(0, len(createRole)): uid = createRole[i] lastLogout = None logout = logoutData[logoutData["uid"] == uid] if len(logout) > 0: lastLogout = logout.iloc[-1] lastLogouttime = 0 if lastLogout is not None: lastLogouttime = lastLogout["timeStamp"] login = loginData[(loginData["uid"] == uid) & (loginData["timeStamp"] > lastLogouttime)] logintime = 0 if len(login) > 0: login = login.iloc[0] logintime = login["timeStamp"] one = {} one["uid"] = uid one["onlineTime"] = 0 if lastLogout is not None: one["onlineTime"] = lastLogout["onlineTime"] if logintime > 0: one["onlineTime"] = one["onlineTime"] + int(endtime.timestamp()) - logintime list.append(one) data = pd.DataFrame(list) return data def CalcDungeon(date, day): #获取进入完成情况 list = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) list = list + CalcDungeonTime(curdate) data = pd.DataFrame(list) totalData = [] for layer, group in data.groupby("TodayLayer"): group.sort_values("costtime") p20 = int(len(group) * 0.2) p80 = int(len(group) * 0.8) center = group.iloc[p20:p80] one = { "layer":layer, "num":len(group), "total":group["costtime"].sum(), "平均耗时":center["costtime"].mean() } totalData.append(one) totalData = pd.DataFrame(totalData) return totalData, data #totalData.to_csv(f"./out/CalcDungeon_totalData.csv", index=False) #data.to_csv(f"./out/CalcDungeon.csv", index=False) def CalcDungeonTime(date): enterData = GetCsvDataShift(LogType.Bill, BillType.DungeonOp.name, date) enterData = enterData[(enterData["isEnter"] == 1) | (enterData["DungeonLayer"] == 999)] exitData = GetCsvDataShift(LogType.Bdc, BdcType.DungeonExit.name, date) logoutData = GetCsvDataShift(LogType.Bill, BillType.Logout.name, date) datalist = [] for uid, group in enterData.groupby("uid"): comple = group[(group["DungeonLayer"] == 999)] if len(comple) == 0: #当天没完成就不分析了 continue group["difftime"] = (group["timeStamp"] - group["timeStamp"].shift()).fillna(0) subexitData = exitData[exitData["role_id"] == uid] sublogoutData = logoutData[logoutData["uid"] == uid] subtime = 0 timeList = group["timeStamp"].tolist() for i in range(1, len(timeList)): exit = subexitData[(subexitData["timeStamp"] > timeList[i - 1]) & (subexitData["timeStamp"] < timeList[i])] if len(exit) > 0: _exitTime = exit["timeStamp"].tolist() subtime = subtime + timeList[i] - _exitTime[0] break logout = sublogoutData[(sublogoutData["timeStamp"] > timeList[i - 1]) & (sublogoutData["timeStamp"] < timeList[i])] if len(exit) > 0: _logout = exit["timeStamp"].tolist() subtime = subtime + timeList[i] - _logout[0] break costtime = group["difftime"].sum() - subtime one = { "uid": uid, "date":date, "TodayLayer":(comple["TodayLayer"].tolist())[0], "costtime":costtime } datalist.append(one) return datalist def GetLoginPlayer(date, isShift = False): data = GetCsvData(LogType.Bill, BillType.Login.name, date) if data is None: return [] data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False) return data["uid"].tolist() #获取次留玩家 def CalcSecondPlayer(date): createPlayer = GetCreateRole(date, 1) nextday = GetDateStrNext(date, 1) loginPlayer = GetLoginPlayer(nextday) second = list(set(createPlayer).intersection(set(loginPlayer))) nosecond = list(set(createPlayer).difference(set(loginPlayer))) return second, nosecond def CalcHangupTime(date, day): hangupList = [] secondList = [] nosecondList = [] for i in range(0, day): curdatetime = DateToTime(date, i) curdate = TimeToDate(curdatetime) getmonyData = GetCsvData(LogType.Bdc, BdcType.GetMoney.name, curdate) if getmonyData is None: print(f"getmonyData {curdate} none") getmonyData = getmonyData[getmonyData["money_type"] == 7] getmonyData = getmonyData.drop_duplicates(subset=["role_id","reason_info"], keep='last',inplace=False) #getmonyData[["role_id"], ["reason_info"]] hangupList.append(getmonyData) second, nosecond = CalcSecondPlayer(curdate) secondList = secondList + second nosecondList = nosecondList + nosecond hangupData = pd.concat(hangupList) hangupTimeData = [] for uid, group in hangupData.groupby("role_id"): one = { "uid" : uid, "time":len(group) } hangupTimeData.append(one) hangupTimeData = pd.DataFrame(hangupTimeData) secondhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(secondList)] nosecondListhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(nosecondList)] firstOnline = GetFirstDayOnlineTime(date) taskBoxData = CalcFinish15Box(date) firstOnline["online(mine)"] = firstOnline["onlineTime"] / 60 total = [] one1 = { "type":"stay" } one1["count"] = len(secondList) one1["total_online"] = firstOnline[firstOnline["uid"].isin(secondList)]["onlineTime"].sum() one1["avg_online"] = one1["total_online"] / one1["count"] one1["avg_hanguptime"] = secondhangupTimeData["time"].sum() / one1["count"] one1["takeBox"] = taskBoxData[taskBoxData["uid"].isin(secondList)]["istask"].sum() total.append(one1) one2 = { "type":"leave" } one2["count"] = len(nosecondList) one2["total_online"] = firstOnline[firstOnline["uid"].isin(nosecondList)]["onlineTime"].sum() one2["avg_online"] = one2["total_online"] / one2["count"] one2["avg_hanguptime"] = secondhangupTimeData["time"].sum() /one2["count"] one2["takeBox"] = taskBoxData[taskBoxData["uid"].isin(nosecondList)]["istask"].sum() total.append(one2) total = pd.DataFrame(total) total.to_csv(f"./out/total.csv", index=False) firstOnline.to_csv(f"./out/firstOnline.csv", index=False) secondhangupTimeData.to_csv(f"./out/secondhangupTimeData.csv", index=False) nosecondListhangupTimeData.to_csv(f"./out/nosecondListhangupTimeData.csv", index=False) return secondhangupTimeData, nosecondListhangupTimeData def CalcFinish15Box(date): createRole = GetCreateRole(date) taskData = GetCsvData(LogType.Bdc, BdcType.FinishQuest.name, date) taskData = taskData[(taskData["task_id"] == 501) & (taskData["role_id"].isin(createRole))] finishMainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date) finishMainland = finishMainland[(finishMainland["mainlandId"] == 10106) & (finishMainland["isWin"] == 0) & (finishMainland["uid"].isin(createRole))] list = [] for i in range(0, len(createRole)): uid = createRole[i] taskforuid = taskData[taskData["role_id"] == uid] task = None if len(taskforuid) > 0: task = taskforuid.iloc[0] mainland = None mainlandForuid = finishMainland[finishMainland["uid"] == uid] if len(mainlandForuid) > 0: mainland = mainlandForuid.iloc[0] istask = False if task is not None: istask = (mainland is None) or (task.timeStamp <= mainland.timeStamp) one = { "uid" : uid } if istask: one["istask"] = 1 else: one["istask"] = 0 list.append(one) return pd.DataFrame(list) def CalcTodayMaxMainland(date): mainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date) mainland = mainland[(mainland["mainlandType"] == 5) & (mainland["isWin"] == 0)] mainland = mainland.drop_duplicates(subset=["uid"], keep='last',inplace=False) mainland = mainland[["uid", "mainlandId"]] #获取玩家停留UI def CalcPlayerStayUI(uid, starTime:datetime, endTime:datetime): starDate = TimeToDate(starTime) endDate = TimeToDate(endTime) day = 0 list = [] while True: curdate = GetDateNext(starTime, day) _data = GetCsvData(LogType.BdcCli, BdcCliType.page_do.name, curdate) _data = _data[(_data["role_id"] == uid) & (_data["operate_way"] == 202)] _data = _data[["logtime","timeStamp", "role_id","leave_page_id", "arrive_page_id"]] list.append(_data) day = day + 1 if curdate == endDate: break csvData = pd.concat(list) csvData = csvData[(csvData["timeStamp"] >= int(starTime.timestamp())) & (csvData["timeStamp"] <= int(endTime.timestamp()))] csvData["diffTime"] = csvData["timeStamp"].shift(-1)- csvData["timeStamp"] if len(csvData) > 10: csvData = csvData.iloc[-10:] return csvData def GetPlayerMaxMainland(uid, endDate): endTime = datetime.datetime.strptime(endDate, "%Y/%m/%d %H:%M") endDate = TimeToDate(endTime) endtimestamp = int(endTime.timestamp()) day = 0 list = [] while True: curdate = GetDateNext(endTime, day) _data = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate) if _data is None: return 0 _data = _data[(_data["uid"] == uid) & (_data["mainlandType"] == 5) & (_data["isWin"] == 0) & (_data["timeStamp"] <= endtimestamp)] _data = _data[["logtime","timeStamp", "uid","mainlandId"]] if len(_data) > 0: _tmp = _data.iloc[-1] return _tmp["mainlandId"] day = day - 1 return 0 #分析主线通关时长 def CalcMainlandStateOnline(date, day, server_id = 0): createRoleData = GetCreateRole(date, day, server_id) payPlayer = GetPayPlayer(date, day) freePlayer = list(set(createRoleData).difference(set(payPlayer))) # fistLogout = CalcFistOnlineTime(date, day) # fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)] finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040] for i in range(21, 41): finishMain.append(10000 + i * 100 + 40) finishData = CalcMainlineFinishOnline(date, day) finishData.to_csv(f"./out/MainlineFinishOnline_{date}_{day}_{server_id}.csv", index=False) #finishData = pd.read_csv(f"./out/MainlineFinishOnline_{date}_{day}.csv") data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))] _list = [] for name, group in data.groupby("state"): group = group.sort_values(by="onlinetime_state") one = { "state" : name, "num":len(group), "onlinetime_avg":group["onlinetime_state"].mean() / 60, "onlinetime_median":group["onlinetime_state"].median() / 60, } _list.append(one) data = pd.DataFrame(_list) return data def CalcFuc(): createRoleData = GetCreateRole("2021-10-03", 3) payPlayer = GetPayPlayer("2021-10-03", 5) freePlayer = list(set(createRoleData).difference(set(payPlayer))) fistLogout = CalcFistOnlineTime("2021-10-03", 5) fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)] finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040] finishData = pd.read_csv("./out/MainlineFinishOnline_2021-10-03_5.csv") data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))] list = [] for name, group in data.groupby("mainlandId"): group = group.sort_values(by="onlinetime_avg") index30 = int(len(group) * 0.3) if index30 <= 0: index30 = 1 index50 = int(len(group) * 0.5) if index30 >= index50: index50 = index30 + 1 datahight = group.iloc[0:index30] datalow = group.iloc[index30 + 1:index50 + 1] dataHightPlayer = fistLogout[fistLogout["uid"].isin( datahight["uid"].tolist())] dataLowPlayer = fistLogout[fistLogout["uid"].isin(datalow["uid"].tolist())] one = { "state" : name, "num":len(group), "hightnum":len(dataHightPlayer), "lownum":len(dataLowPlayer), "hight_avg": dataHightPlayer["onlineTime"].mean() / 3600, "low_avg": dataLowPlayer["onlineTime"].mean() / 3600, } list.append(one) data = pd.DataFrame(list) data.to_csv(f"./out/233333.csv", index=False) def CalcDiamonds(): def GetTime(timeStr): timeArray = datetime.datetime.strptime(timeStr, "%Y-%m-%d %H:%M:%S") return int(timeArray.timestamp()) def CalcDistribution(date, data): borders = [0, 3000, 6000] values = [0, 0, 0] for i in range(0, len(data)): temp = data.iloc[i] if temp["diamonds"] >= borders[0] and temp["diamonds"] < borders[1]: values[0] = values[0] + 1 elif temp["diamonds"] >= borders[1] and temp["diamonds"] < borders[2]: values[1] = values[1] + 1 elif temp["diamonds"] >= borders[2]: values[2] = values[2] + 1 one = { "date": date, "[0, 3000)":values[0], "[3000, 6000}":values[1], "[6000, +max)":values[2] } return one list = [] def FormatTime(timeDate): return timeDate[0:10] def FormatInt(value): if type(value) == int: return int(value) else: return value.strip('\"') def CalcDiamondsFile(file): data = pd.read_csv(file) data["date"] = data.apply(lambda x : FormatTime(x["事件时间"]), axis=1) data["time"] = data.apply(lambda x : GetTime(x["事件时间"]), axis=1) data["diamonds"] = data.apply(lambda x : FormatInt(x["钻石存量"]), axis=1) for date, group in data.groupby("date"): group.sort_values("time") group = group.drop_duplicates(subset=["账户ID"], keep='last',inplace=False) list.append(CalcDistribution(date, group)) CalcDiamondsFile("./csv/ta/玩家快照10.3-10.9.csv") CalcDiamondsFile("./csv/ta/玩家快照11.5-11.11.csv") _cavData = pd.DataFrame(list) _cavData.to_csv(f"./out/玩家钻石存量分布.csv", index=False) print("2333")