You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

557 lines
20 KiB

from datetime import date, timedelta
from re import match
from typing import List
import numpy as np
from numpy.core.fromnumeric import mean
from pandas._libs.tslibs.timestamps import Timestamp
from pandas.tseries.offsets import Second
from common.Def import *
from common.CommonFuc import *
import pandas as pd
#分析主线在线时长
def CalcMainlineFinishOnline(date, day):
def GetmainlandData():
mainlandList = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
curdata = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate)
if curdata is None:
break
lastdate = GetDateNext(curdatetime, -1)
curLoginData = GetCsvData(LogType.Bill, BillType.Login.name, curdate)
lastLoginData = GetCsvData(LogType.Bill, BillType.Login.name, lastdate)
def GetOnlineTime(uid, timeStamp):
_csvData = curLoginData[(curLoginData["uid"] == uid) & (curLoginData["reonline"] == 0) & (curLoginData["timeStamp"] <= timeStamp)]
if len(_csvData) > 0:
_last = _csvData.iloc[-1]
return _last.onlineTime + timeStamp - _last.timeStamp
if lastLoginData is not None:
_csvData = lastLoginData[(lastLoginData["uid"] == uid) & (lastLoginData["reonline"] == 0) & (lastLoginData["timeStamp"] <= timeStamp)]
if len(_csvData) > 0:
_last = _csvData.iloc[-1]
return _last.onlineTime + timeStamp - _last.timeStamp
return 0
if curdata is not None:
curdata = curdata[(curdata["mainlandType"] == MainlandType.Mainline) & (curdata["isWin"] == 0)]
curdata = curdata[["logtime","timeStamp", "uid", "mainlandId"]]
curdata["onlinetime"] = curdata.apply(lambda x : GetOnlineTime(x.uid, x.timeStamp), axis=1)
mainlandList.append(curdata)
mainlandData = pd.concat(mainlandList)
return mainlandData
mainlandData = GetmainlandData()
groupList = []
for name, group in mainlandData.groupby("uid"):
group["onlinetime_diff"] = (group["onlinetime"] - group["onlinetime"].shift()).fillna(0)
group["state"] = (group["mainlandId"].values % 10000) // 100
group_data = group.groupby(by=['state'])['onlinetime_diff'].agg([
("onlinetime_avg", np.mean),
("onlinetime_state", np.sum)
]).reset_index()
_one = pd.merge(group, group_data, on="state")
#group["onlinetime_avg"] = group_data[group["state"] == group_data["state"]]["onlinetime_avg"]
groupList.append(_one)
mainlandData = pd.concat(groupList)
return mainlandData
#筛选指定玩家bill数据
def CalcPlayerBillData(uid, billtype, date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
billtype = BillType(billtype)
curdata = GetCsvData(LogType.Bill, billtype.name, curdate)
if billtype == BillType.MailOpBegin:
list.append(curdata[(curdata["opUid"] == uid) | (curdata["ownerUid"] == uid)])
else:
list.append(curdata[curdata["uid"] == uid])
data = pd.concat(list)
return data
#筛选指定玩家bdc数据
def CalcPlayerBdcData(uid, bdctype, date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
bdctype = BdcType(bdctype)
curdata = GetCsvData(LogType.Bdc, bdctype.name, curdate)
list.append(curdata[curdata["role_id"] == uid])
data = pd.concat(list)
return data
#计算付费用户
def GetPayPlayer(date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bill, BillType.PayGoogleSuccessRes.name, curdate)
if csvdata is None:
return []
csvdata = csvdata[(csvdata["ret"] == 0) & (csvdata["payStatus"] == 2)]
#csvdata = [["logtime", "timeStamp", "uid"]]
list.append(csvdata)
data = pd.concat(list)
data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False)
return data["uid"].tolist()
#获取创号用户
def GetCreateRole(date, day = 1, server_id = 0):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bdc, BdcType.CreateRole.name, curdate)
if csvdata is None:
return []
if server_id > 0:
csvdata = csvdata[csvdata["server_id"] == server_id]
#csvdata = csvdata[["logtime", "timeSta mp", "role_id"]]
list.append(csvdata)
data = pd.concat(list)
return data["role_id"].tolist()
#获取玩家首次在线时长
def CalcFistOnlineTime(date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bill, BillType.Logout.name, curdate)
csvdata = csvdata[csvdata["loginCount"] == 1]
csvdata = csvdata[["logtime", "timeStamp", "uid", "onlineTime"]]
list.append(csvdata)
data = pd.concat(list)
return data
def GetFirstDayOnlineTime(date):
createRole = GetCreateRole(date)
loginData = GetCsvData(LogType.Bill, BillType.Login.name, date)
loginData = loginData[loginData["uid"].isin(createRole)]
logoutData = GetCsvData(LogType.Bill, BillType.Logout.name, date)
logoutData = logoutData[logoutData["uid"].isin(createRole)]
endtime = DateToTime(date, 1)
list = []
for i in range(0, len(createRole)):
uid = createRole[i]
lastLogout = None
logout = logoutData[logoutData["uid"] == uid]
if len(logout) > 0:
lastLogout = logout.iloc[-1]
lastLogouttime = 0
if lastLogout is not None:
lastLogouttime = lastLogout["timeStamp"]
login = loginData[(loginData["uid"] == uid) & (loginData["timeStamp"] > lastLogouttime)]
logintime = 0
if len(login) > 0:
login = login.iloc[0]
logintime = login["timeStamp"]
one = {}
one["uid"] = uid
one["onlineTime"] = 0
if lastLogout is not None:
one["onlineTime"] = lastLogout["onlineTime"]
if logintime > 0:
one["onlineTime"] = one["onlineTime"] + int(endtime.timestamp()) - logintime
list.append(one)
data = pd.DataFrame(list)
return data
def CalcDungeon(date, day):
#获取进入完成情况
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
list = list + CalcDungeonTime(curdate)
data = pd.DataFrame(list)
totalData = []
for layer, group in data.groupby("TodayLayer"):
group.sort_values("costtime")
p20 = int(len(group) * 0.2)
p80 = int(len(group) * 0.8)
center = group.iloc[p20:p80]
one = {
"layer":layer,
"num":len(group),
"total":group["costtime"].sum(),
"平均耗时":center["costtime"].mean()
}
totalData.append(one)
totalData = pd.DataFrame(totalData)
return totalData, data
#totalData.to_csv(f"./out/CalcDungeon_totalData.csv", index=False)
#data.to_csv(f"./out/CalcDungeon.csv", index=False)
def CalcDungeonTime(date):
enterData = GetCsvDataShift(LogType.Bill, BillType.DungeonOp.name, date)
enterData = enterData[(enterData["isEnter"] == 1) | (enterData["DungeonLayer"] == 999)]
exitData = GetCsvDataShift(LogType.Bdc, BdcType.DungeonExit.name, date)
logoutData = GetCsvDataShift(LogType.Bill, BillType.Logout.name, date)
datalist = []
for uid, group in enterData.groupby("uid"):
comple = group[(group["DungeonLayer"] == 999)]
if len(comple) == 0: #当天没完成就不分析了
continue
group["difftime"] = (group["timeStamp"] - group["timeStamp"].shift()).fillna(0)
subexitData = exitData[exitData["role_id"] == uid]
sublogoutData = logoutData[logoutData["uid"] == uid]
subtime = 0
timeList = group["timeStamp"].tolist()
for i in range(1, len(timeList)):
exit = subexitData[(subexitData["timeStamp"] > timeList[i - 1]) & (subexitData["timeStamp"] < timeList[i])]
if len(exit) > 0:
_exitTime = exit["timeStamp"].tolist()
subtime = subtime + timeList[i] - _exitTime[0]
break
logout = sublogoutData[(sublogoutData["timeStamp"] > timeList[i - 1]) & (sublogoutData["timeStamp"] < timeList[i])]
if len(exit) > 0:
_logout = exit["timeStamp"].tolist()
subtime = subtime + timeList[i] - _logout[0]
break
costtime = group["difftime"].sum() - subtime
one = {
"uid": uid,
"date":date,
"TodayLayer":(comple["TodayLayer"].tolist())[0],
"costtime":costtime
}
datalist.append(one)
return datalist
def GetLoginPlayer(date, isShift = False):
data = GetCsvData(LogType.Bill, BillType.Login.name, date)
if data is None:
return []
data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False)
return data["uid"].tolist()
#获取次留玩家
def CalcSecondPlayer(date):
createPlayer = GetCreateRole(date, 1)
nextday = GetDateStrNext(date, 1)
loginPlayer = GetLoginPlayer(nextday)
second = list(set(createPlayer).intersection(set(loginPlayer)))
nosecond = list(set(createPlayer).difference(set(loginPlayer)))
return second, nosecond
def CalcHangupTime(date, day):
hangupList = []
secondList = []
nosecondList = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
getmonyData = GetCsvData(LogType.Bdc, BdcType.GetMoney.name, curdate)
if getmonyData is None:
print(f"getmonyData {curdate} none")
getmonyData = getmonyData[getmonyData["money_type"] == 7]
getmonyData = getmonyData.drop_duplicates(subset=["role_id","reason_info"], keep='last',inplace=False)
#getmonyData[["role_id"], ["reason_info"]]
hangupList.append(getmonyData)
second, nosecond = CalcSecondPlayer(curdate)
secondList = secondList + second
nosecondList = nosecondList + nosecond
hangupData = pd.concat(hangupList)
hangupTimeData = []
for uid, group in hangupData.groupby("role_id"):
one = {
"uid" : uid,
"time":len(group)
}
hangupTimeData.append(one)
hangupTimeData = pd.DataFrame(hangupTimeData)
secondhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(secondList)]
nosecondListhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(nosecondList)]
firstOnline = GetFirstDayOnlineTime(date)
taskBoxData = CalcFinish15Box(date)
firstOnline["online(mine)"] = firstOnline["onlineTime"] / 60
total = []
one1 = {
"type":"stay"
}
one1["count"] = len(secondList)
one1["total_online"] = firstOnline[firstOnline["uid"].isin(secondList)]["onlineTime"].sum()
one1["avg_online"] = one1["total_online"] / one1["count"]
one1["avg_hanguptime"] = secondhangupTimeData["time"].sum() / one1["count"]
one1["takeBox"] = taskBoxData[taskBoxData["uid"].isin(secondList)]["istask"].sum()
total.append(one1)
one2 = {
"type":"leave"
}
one2["count"] = len(nosecondList)
one2["total_online"] = firstOnline[firstOnline["uid"].isin(nosecondList)]["onlineTime"].sum()
one2["avg_online"] = one2["total_online"] / one2["count"]
one2["avg_hanguptime"] = secondhangupTimeData["time"].sum() /one2["count"]
one2["takeBox"] = taskBoxData[taskBoxData["uid"].isin(nosecondList)]["istask"].sum()
total.append(one2)
total = pd.DataFrame(total)
total.to_csv(f"./out/total.csv", index=False)
firstOnline.to_csv(f"./out/firstOnline.csv", index=False)
secondhangupTimeData.to_csv(f"./out/secondhangupTimeData.csv", index=False)
nosecondListhangupTimeData.to_csv(f"./out/nosecondListhangupTimeData.csv", index=False)
return secondhangupTimeData, nosecondListhangupTimeData
def CalcFinish15Box(date):
createRole = GetCreateRole(date)
taskData = GetCsvData(LogType.Bdc, BdcType.FinishQuest.name, date)
taskData = taskData[(taskData["task_id"] == 501) & (taskData["role_id"].isin(createRole))]
finishMainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date)
finishMainland = finishMainland[(finishMainland["mainlandId"] == 10106) & (finishMainland["isWin"] == 0) & (finishMainland["uid"].isin(createRole))]
list = []
for i in range(0, len(createRole)):
uid = createRole[i]
taskforuid = taskData[taskData["role_id"] == uid]
task = None
if len(taskforuid) > 0:
task = taskforuid.iloc[0]
mainland = None
mainlandForuid = finishMainland[finishMainland["uid"] == uid]
if len(mainlandForuid) > 0:
mainland = mainlandForuid.iloc[0]
istask = False
if task is not None:
istask = (mainland is None) or (task.timeStamp <= mainland.timeStamp)
one = {
"uid" : uid
}
if istask:
one["istask"] = 1
else:
one["istask"] = 0
list.append(one)
return pd.DataFrame(list)
def CalcTodayMaxMainland(date):
mainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date)
mainland = mainland[(mainland["mainlandType"] == 5) & (mainland["isWin"] == 0)]
mainland = mainland.drop_duplicates(subset=["uid"], keep='last',inplace=False)
mainland = mainland[["uid", "mainlandId"]]
#获取玩家停留UI
def CalcPlayerStayUI(uid, starTime:datetime, endTime:datetime):
starDate = TimeToDate(starTime)
endDate = TimeToDate(endTime)
day = 0
list = []
while True:
curdate = GetDateNext(starTime, day)
_data = GetCsvData(LogType.BdcCli, BdcCliType.page_do.name, curdate)
_data = _data[(_data["role_id"] == uid) & (_data["operate_way"] == 202)]
_data = _data[["logtime","timeStamp", "role_id","leave_page_id", "arrive_page_id"]]
list.append(_data)
day = day + 1
if curdate == endDate:
break
csvData = pd.concat(list)
csvData = csvData[(csvData["timeStamp"] >= int(starTime.timestamp())) & (csvData["timeStamp"] <= int(endTime.timestamp()))]
csvData["diffTime"] = csvData["timeStamp"].shift(-1)- csvData["timeStamp"]
if len(csvData) > 10:
csvData = csvData.iloc[-10:]
return csvData
def GetPlayerMaxMainland(uid, endDate):
endTime = datetime.datetime.strptime(endDate, "%Y/%m/%d %H:%M")
endDate = TimeToDate(endTime)
endtimestamp = int(endTime.timestamp())
day = 0
list = []
while True:
curdate = GetDateNext(endTime, day)
_data = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate)
if _data is None:
return 0
_data = _data[(_data["uid"] == uid) & (_data["mainlandType"] == 5) & (_data["isWin"] == 0) & (_data["timeStamp"] <= endtimestamp)]
_data = _data[["logtime","timeStamp", "uid","mainlandId"]]
if len(_data) > 0:
_tmp = _data.iloc[-1]
return _tmp["mainlandId"]
day = day - 1
return 0
#分析主线通关时长
def CalcMainlandStateOnline(date, day, server_id = 0):
createRoleData = GetCreateRole(date, day, server_id)
payPlayer = GetPayPlayer(date, day)
freePlayer = list(set(createRoleData).difference(set(payPlayer)))
# fistLogout = CalcFistOnlineTime(date, day)
# fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)]
finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040]
for i in range(21, 41):
finishMain.append(10000 + i * 100 + 40)
finishData = CalcMainlineFinishOnline(date, day)
finishData.to_csv(f"./out/MainlineFinishOnline_{date}_{day}_{server_id}.csv", index=False)
#finishData = pd.read_csv(f"./out/MainlineFinishOnline_{date}_{day}.csv")
data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))]
_list = []
for name, group in data.groupby("state"):
group = group.sort_values(by="onlinetime_state")
one = {
"state" : name,
"num":len(group),
"onlinetime_avg":group["onlinetime_state"].mean() / 60,
"onlinetime_median":group["onlinetime_state"].median() / 60,
}
_list.append(one)
data = pd.DataFrame(_list)
return data
def CalcFuc():
createRoleData = GetCreateRole("2021-10-03", 3)
payPlayer = GetPayPlayer("2021-10-03", 5)
freePlayer = list(set(createRoleData).difference(set(payPlayer)))
fistLogout = CalcFistOnlineTime("2021-10-03", 5)
fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)]
finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040]
finishData = pd.read_csv("./out/MainlineFinishOnline_2021-10-03_5.csv")
data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))]
list = []
for name, group in data.groupby("mainlandId"):
group = group.sort_values(by="onlinetime_avg")
index30 = int(len(group) * 0.3)
if index30 <= 0:
index30 = 1
index50 = int(len(group) * 0.5)
if index30 >= index50:
index50 = index30 + 1
datahight = group.iloc[0:index30]
datalow = group.iloc[index30 + 1:index50 + 1]
dataHightPlayer = fistLogout[fistLogout["uid"].isin( datahight["uid"].tolist())]
dataLowPlayer = fistLogout[fistLogout["uid"].isin(datalow["uid"].tolist())]
one = {
"state" : name,
"num":len(group),
"hightnum":len(dataHightPlayer),
"lownum":len(dataLowPlayer),
"hight_avg": dataHightPlayer["onlineTime"].mean() / 3600,
"low_avg": dataLowPlayer["onlineTime"].mean() / 3600,
}
list.append(one)
data = pd.DataFrame(list)
data.to_csv(f"./out/233333.csv", index=False)
def CalcDiamonds():
def GetTime(timeStr):
timeArray = datetime.datetime.strptime(timeStr, "%Y-%m-%d %H:%M:%S")
return int(timeArray.timestamp())
def CalcDistribution(date, data):
borders = [0, 3000, 6000]
values = [0, 0, 0]
for i in range(0, len(data)):
temp = data.iloc[i]
if temp["diamonds"] >= borders[0] and temp["diamonds"] < borders[1]:
values[0] = values[0] + 1
elif temp["diamonds"] >= borders[1] and temp["diamonds"] < borders[2]:
values[1] = values[1] + 1
elif temp["diamonds"] >= borders[2]:
values[2] = values[2] + 1
one = {
"date": date,
"[0, 3000)":values[0],
"[3000, 6000}":values[1],
"[6000, +max)":values[2]
}
return one
list = []
def FormatTime(timeDate):
return timeDate[0:10]
def FormatInt(value):
if type(value) == int:
return int(value)
else:
return value.strip('\"')
def CalcDiamondsFile(file):
data = pd.read_csv(file)
data["date"] = data.apply(lambda x : FormatTime(x["事件时间"]), axis=1)
data["time"] = data.apply(lambda x : GetTime(x["事件时间"]), axis=1)
data["diamonds"] = data.apply(lambda x : FormatInt(x["钻石存量"]), axis=1)
for date, group in data.groupby("date"):
group.sort_values("time")
group = group.drop_duplicates(subset=["账户ID"], keep='last',inplace=False)
list.append(CalcDistribution(date, group))
CalcDiamondsFile("./csv/ta/玩家快照10.3-10.9.csv")
CalcDiamondsFile("./csv/ta/玩家快照11.5-11.11.csv")
_cavData = pd.DataFrame(list)
_cavData.to_csv(f"./out/玩家钻石存量分布.csv", index=False)
print("2333")