You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

558 lines
20 KiB

1 month ago
from datetime import date, timedelta
from re import match
from typing import List
import numpy as np
from numpy.core.fromnumeric import mean
from pandas._libs.tslibs.timestamps import Timestamp
from pandas.tseries.offsets import Second
from common.Def import *
from common.CommonFuc import *
import pandas as pd
#分析主线在线时长
def CalcMainlineFinishOnline(date, day):
def GetmainlandData():
mainlandList = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
curdata = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate)
if curdata is None:
break
lastdate = GetDateNext(curdatetime, -1)
curLoginData = GetCsvData(LogType.Bill, BillType.Login.name, curdate)
lastLoginData = GetCsvData(LogType.Bill, BillType.Login.name, lastdate)
def GetOnlineTime(uid, timeStamp):
_csvData = curLoginData[(curLoginData["uid"] == uid) & (curLoginData["reonline"] == 0) & (curLoginData["timeStamp"] <= timeStamp)]
if len(_csvData) > 0:
_last = _csvData.iloc[-1]
return _last.onlineTime + timeStamp - _last.timeStamp
if lastLoginData is not None:
_csvData = lastLoginData[(lastLoginData["uid"] == uid) & (lastLoginData["reonline"] == 0) & (lastLoginData["timeStamp"] <= timeStamp)]
if len(_csvData) > 0:
_last = _csvData.iloc[-1]
return _last.onlineTime + timeStamp - _last.timeStamp
return 0
if curdata is not None:
curdata = curdata[(curdata["mainlandType"] == MainlandType.Mainline) & (curdata["isWin"] == 0)]
curdata = curdata[["logtime","timeStamp", "uid", "mainlandId"]]
curdata["onlinetime"] = curdata.apply(lambda x : GetOnlineTime(x.uid, x.timeStamp), axis=1)
mainlandList.append(curdata)
mainlandData = pd.concat(mainlandList)
return mainlandData
mainlandData = GetmainlandData()
groupList = []
for name, group in mainlandData.groupby("uid"):
group["onlinetime_diff"] = (group["onlinetime"] - group["onlinetime"].shift()).fillna(0)
group["state"] = (group["mainlandId"].values % 10000) // 100
group_data = group.groupby(by=['state'])['onlinetime_diff'].agg([
("onlinetime_avg", np.mean),
("onlinetime_state", np.sum)
]).reset_index()
_one = pd.merge(group, group_data, on="state")
#group["onlinetime_avg"] = group_data[group["state"] == group_data["state"]]["onlinetime_avg"]
groupList.append(_one)
mainlandData = pd.concat(groupList)
return mainlandData
#筛选指定玩家bill数据
def CalcPlayerBillData(uid, billtype, date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
billtype = BillType(billtype)
curdata = GetCsvData(LogType.Bill, billtype.name, curdate)
if billtype == BillType.MailOpBegin:
list.append(curdata[(curdata["opUid"] == uid) | (curdata["ownerUid"] == uid)])
else:
list.append(curdata[curdata["uid"] == uid])
data = pd.concat(list)
return data
#筛选指定玩家bdc数据
def CalcPlayerBdcData(uid, bdctype, date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
bdctype = BdcType(bdctype)
curdata = GetCsvData(LogType.Bdc, bdctype.name, curdate)
list.append(curdata[curdata["role_id"] == uid])
data = pd.concat(list)
return data
#计算付费用户
def GetPayPlayer(date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bill, BillType.PayGoogleSuccessRes.name, curdate)
if csvdata is None:
return []
csvdata = csvdata[(csvdata["ret"] == 0) & (csvdata["payStatus"] == 2)]
#csvdata = [["logtime", "timeStamp", "uid"]]
list.append(csvdata)
data = pd.concat(list)
data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False)
return data["uid"].tolist()
#获取创号用户
def GetCreateRole(date, day = 1, server_id = 0):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bdc, BdcType.CreateRole.name, curdate)
if csvdata is None:
return []
if server_id > 0:
csvdata = csvdata[csvdata["server_id"] == server_id]
#csvdata = csvdata[["logtime", "timeSta mp", "role_id"]]
list.append(csvdata)
data = pd.concat(list)
return data["role_id"].tolist()
#获取玩家首次在线时长
def CalcFistOnlineTime(date, day):
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
csvdata = GetCsvData(LogType.Bill, BillType.Logout.name, curdate)
csvdata = csvdata[csvdata["loginCount"] == 1]
csvdata = csvdata[["logtime", "timeStamp", "uid", "onlineTime"]]
list.append(csvdata)
data = pd.concat(list)
return data
def GetFirstDayOnlineTime(date):
createRole = GetCreateRole(date)
loginData = GetCsvData(LogType.Bill, BillType.Login.name, date)
loginData = loginData[loginData["uid"].isin(createRole)]
logoutData = GetCsvData(LogType.Bill, BillType.Logout.name, date)
logoutData = logoutData[logoutData["uid"].isin(createRole)]
endtime = DateToTime(date, 1)
list = []
for i in range(0, len(createRole)):
uid = createRole[i]
lastLogout = None
logout = logoutData[logoutData["uid"] == uid]
if len(logout) > 0:
lastLogout = logout.iloc[-1]
lastLogouttime = 0
if lastLogout is not None:
lastLogouttime = lastLogout["timeStamp"]
login = loginData[(loginData["uid"] == uid) & (loginData["timeStamp"] > lastLogouttime)]
logintime = 0
if len(login) > 0:
login = login.iloc[0]
logintime = login["timeStamp"]
one = {}
one["uid"] = uid
one["onlineTime"] = 0
if lastLogout is not None:
one["onlineTime"] = lastLogout["onlineTime"]
if logintime > 0:
one["onlineTime"] = one["onlineTime"] + int(endtime.timestamp()) - logintime
list.append(one)
data = pd.DataFrame(list)
return data
def CalcDungeon(date, day):
#获取进入完成情况
list = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
list = list + CalcDungeonTime(curdate)
data = pd.DataFrame(list)
totalData = []
for layer, group in data.groupby("TodayLayer"):
group.sort_values("costtime")
p20 = int(len(group) * 0.2)
p80 = int(len(group) * 0.8)
center = group.iloc[p20:p80]
one = {
"layer":layer,
"num":len(group),
"total":group["costtime"].sum(),
"平均耗时":center["costtime"].mean()
}
totalData.append(one)
totalData = pd.DataFrame(totalData)
return totalData, data
#totalData.to_csv(f"./out/CalcDungeon_totalData.csv", index=False)
#data.to_csv(f"./out/CalcDungeon.csv", index=False)
def CalcDungeonTime(date):
enterData = GetCsvDataShift(LogType.Bill, BillType.DungeonOp.name, date)
enterData = enterData[(enterData["isEnter"] == 1) | (enterData["DungeonLayer"] == 999)]
exitData = GetCsvDataShift(LogType.Bdc, BdcType.DungeonExit.name, date)
logoutData = GetCsvDataShift(LogType.Bill, BillType.Logout.name, date)
datalist = []
for uid, group in enterData.groupby("uid"):
comple = group[(group["DungeonLayer"] == 999)]
if len(comple) == 0: #当天没完成就不分析了
continue
group["difftime"] = (group["timeStamp"] - group["timeStamp"].shift()).fillna(0)
subexitData = exitData[exitData["role_id"] == uid]
sublogoutData = logoutData[logoutData["uid"] == uid]
subtime = 0
timeList = group["timeStamp"].tolist()
for i in range(1, len(timeList)):
exit = subexitData[(subexitData["timeStamp"] > timeList[i - 1]) & (subexitData["timeStamp"] < timeList[i])]
if len(exit) > 0:
_exitTime = exit["timeStamp"].tolist()
subtime = subtime + timeList[i] - _exitTime[0]
break
logout = sublogoutData[(sublogoutData["timeStamp"] > timeList[i - 1]) & (sublogoutData["timeStamp"] < timeList[i])]
if len(exit) > 0:
_logout = exit["timeStamp"].tolist()
subtime = subtime + timeList[i] - _logout[0]
break
costtime = group["difftime"].sum() - subtime
one = {
"uid": uid,
"date":date,
"TodayLayer":(comple["TodayLayer"].tolist())[0],
"costtime":costtime
}
datalist.append(one)
return datalist
def GetLoginPlayer(date, isShift = False):
data = GetCsvData(LogType.Bill, BillType.Login.name, date)
if data is None:
return []
data = data.drop_duplicates(subset=["uid"], keep='last',inplace=False)
return data["uid"].tolist()
#获取次留玩家
def CalcSecondPlayer(date):
createPlayer = GetCreateRole(date, 1)
nextday = GetDateStrNext(date, 1)
loginPlayer = GetLoginPlayer(nextday)
second = list(set(createPlayer).intersection(set(loginPlayer)))
nosecond = list(set(createPlayer).difference(set(loginPlayer)))
return second, nosecond
def CalcHangupTime(date, day):
hangupList = []
secondList = []
nosecondList = []
for i in range(0, day):
curdatetime = DateToTime(date, i)
curdate = TimeToDate(curdatetime)
getmonyData = GetCsvData(LogType.Bdc, BdcType.GetMoney.name, curdate)
if getmonyData is None:
print(f"getmonyData {curdate} none")
getmonyData = getmonyData[getmonyData["money_type"] == 7]
getmonyData = getmonyData.drop_duplicates(subset=["role_id","reason_info"], keep='last',inplace=False)
#getmonyData[["role_id"], ["reason_info"]]
hangupList.append(getmonyData)
second, nosecond = CalcSecondPlayer(curdate)
secondList = secondList + second
nosecondList = nosecondList + nosecond
hangupData = pd.concat(hangupList)
hangupTimeData = []
for uid, group in hangupData.groupby("role_id"):
one = {
"uid" : uid,
"time":len(group)
}
hangupTimeData.append(one)
hangupTimeData = pd.DataFrame(hangupTimeData)
secondhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(secondList)]
nosecondListhangupTimeData = hangupTimeData[hangupTimeData["uid"].isin(nosecondList)]
firstOnline = GetFirstDayOnlineTime(date)
taskBoxData = CalcFinish15Box(date)
firstOnline["online(mine)"] = firstOnline["onlineTime"] / 60
total = []
one1 = {
"type":"stay"
}
one1["count"] = len(secondList)
one1["total_online"] = firstOnline[firstOnline["uid"].isin(secondList)]["onlineTime"].sum()
one1["avg_online"] = one1["total_online"] / one1["count"]
one1["avg_hanguptime"] = secondhangupTimeData["time"].sum() / one1["count"]
one1["takeBox"] = taskBoxData[taskBoxData["uid"].isin(secondList)]["istask"].sum()
total.append(one1)
one2 = {
"type":"leave"
}
one2["count"] = len(nosecondList)
one2["total_online"] = firstOnline[firstOnline["uid"].isin(nosecondList)]["onlineTime"].sum()
one2["avg_online"] = one2["total_online"] / one2["count"]
one2["avg_hanguptime"] = secondhangupTimeData["time"].sum() /one2["count"]
one2["takeBox"] = taskBoxData[taskBoxData["uid"].isin(nosecondList)]["istask"].sum()
total.append(one2)
total = pd.DataFrame(total)
total.to_csv(f"./out/total.csv", index=False)
firstOnline.to_csv(f"./out/firstOnline.csv", index=False)
secondhangupTimeData.to_csv(f"./out/secondhangupTimeData.csv", index=False)
nosecondListhangupTimeData.to_csv(f"./out/nosecondListhangupTimeData.csv", index=False)
return secondhangupTimeData, nosecondListhangupTimeData
def CalcFinish15Box(date):
createRole = GetCreateRole(date)
taskData = GetCsvData(LogType.Bdc, BdcType.FinishQuest.name, date)
taskData = taskData[(taskData["task_id"] == 501) & (taskData["role_id"].isin(createRole))]
finishMainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date)
finishMainland = finishMainland[(finishMainland["mainlandId"] == 10106) & (finishMainland["isWin"] == 0) & (finishMainland["uid"].isin(createRole))]
list = []
for i in range(0, len(createRole)):
uid = createRole[i]
taskforuid = taskData[taskData["role_id"] == uid]
task = None
if len(taskforuid) > 0:
task = taskforuid.iloc[0]
mainland = None
mainlandForuid = finishMainland[finishMainland["uid"] == uid]
if len(mainlandForuid) > 0:
mainland = mainlandForuid.iloc[0]
istask = False
if task is not None:
istask = (mainland is None) or (task.timeStamp <= mainland.timeStamp)
one = {
"uid" : uid
}
if istask:
one["istask"] = 1
else:
one["istask"] = 0
list.append(one)
return pd.DataFrame(list)
def CalcTodayMaxMainland(date):
mainland = GetCsvData(LogType.Bill, BillType.FinishMainland.name, date)
mainland = mainland[(mainland["mainlandType"] == 5) & (mainland["isWin"] == 0)]
mainland = mainland.drop_duplicates(subset=["uid"], keep='last',inplace=False)
mainland = mainland[["uid", "mainlandId"]]
#获取玩家停留UI
def CalcPlayerStayUI(uid, starTime:datetime, endTime:datetime):
starDate = TimeToDate(starTime)
endDate = TimeToDate(endTime)
day = 0
list = []
while True:
curdate = GetDateNext(starTime, day)
_data = GetCsvData(LogType.BdcCli, BdcCliType.page_do.name, curdate)
_data = _data[(_data["role_id"] == uid) & (_data["operate_way"] == 202)]
_data = _data[["logtime","timeStamp", "role_id","leave_page_id", "arrive_page_id"]]
list.append(_data)
day = day + 1
if curdate == endDate:
break
csvData = pd.concat(list)
csvData = csvData[(csvData["timeStamp"] >= int(starTime.timestamp())) & (csvData["timeStamp"] <= int(endTime.timestamp()))]
csvData["diffTime"] = csvData["timeStamp"].shift(-1)- csvData["timeStamp"]
if len(csvData) > 10:
csvData = csvData.iloc[-10:]
return csvData
def GetPlayerMaxMainland(uid, endDate):
endTime = datetime.datetime.strptime(endDate, "%Y/%m/%d %H:%M")
endDate = TimeToDate(endTime)
endtimestamp = int(endTime.timestamp())
day = 0
list = []
while True:
curdate = GetDateNext(endTime, day)
_data = GetCsvData(LogType.Bill, BillType.FinishMainland.name, curdate)
if _data is None:
return 0
_data = _data[(_data["uid"] == uid) & (_data["mainlandType"] == 5) & (_data["isWin"] == 0) & (_data["timeStamp"] <= endtimestamp)]
_data = _data[["logtime","timeStamp", "uid","mainlandId"]]
if len(_data) > 0:
_tmp = _data.iloc[-1]
return _tmp["mainlandId"]
day = day - 1
return 0
#分析主线通关时长
def CalcMainlandStateOnline(date, day, server_id = 0):
createRoleData = GetCreateRole(date, day, server_id)
payPlayer = GetPayPlayer(date, day)
freePlayer = list(set(createRoleData).difference(set(payPlayer)))
# fistLogout = CalcFistOnlineTime(date, day)
# fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)]
finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040]
for i in range(21, 41):
finishMain.append(10000 + i * 100 + 40)
finishData = CalcMainlineFinishOnline(date, day)
finishData.to_csv(f"./out/MainlineFinishOnline_{date}_{day}_{server_id}.csv", index=False)
#finishData = pd.read_csv(f"./out/MainlineFinishOnline_{date}_{day}.csv")
data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))]
_list = []
for name, group in data.groupby("state"):
group = group.sort_values(by="onlinetime_state")
one = {
"state" : name,
"num":len(group),
"onlinetime_avg":group["onlinetime_state"].mean() / 60,
"onlinetime_median":group["onlinetime_state"].median() / 60,
}
_list.append(one)
data = pd.DataFrame(_list)
return data
def CalcFuc():
createRoleData = GetCreateRole("2021-10-03", 3)
payPlayer = GetPayPlayer("2021-10-03", 5)
freePlayer = list(set(createRoleData).difference(set(payPlayer)))
fistLogout = CalcFistOnlineTime("2021-10-03", 5)
fistLogout = fistLogout[fistLogout["uid"].isin(freePlayer)]
finishMain = [10112, 10220, 10324, 10428, 10532, 10636, 10740, 10840, 10940, 11040, 11140, 11240, 11340, 11440, 11540, 11640, 11740, 11840, 11940,12040]
finishData = pd.read_csv("./out/MainlineFinishOnline_2021-10-03_5.csv")
data = finishData[(finishData["uid"].isin(freePlayer)) & (finishData["mainlandId"].isin(finishMain))]
list = []
for name, group in data.groupby("mainlandId"):
group = group.sort_values(by="onlinetime_avg")
index30 = int(len(group) * 0.3)
if index30 <= 0:
index30 = 1
index50 = int(len(group) * 0.5)
if index30 >= index50:
index50 = index30 + 1
datahight = group.iloc[0:index30]
datalow = group.iloc[index30 + 1:index50 + 1]
dataHightPlayer = fistLogout[fistLogout["uid"].isin( datahight["uid"].tolist())]
dataLowPlayer = fistLogout[fistLogout["uid"].isin(datalow["uid"].tolist())]
one = {
"state" : name,
"num":len(group),
"hightnum":len(dataHightPlayer),
"lownum":len(dataLowPlayer),
"hight_avg": dataHightPlayer["onlineTime"].mean() / 3600,
"low_avg": dataLowPlayer["onlineTime"].mean() / 3600,
}
list.append(one)
data = pd.DataFrame(list)
data.to_csv(f"./out/233333.csv", index=False)
def CalcDiamonds():
def GetTime(timeStr):
timeArray = datetime.datetime.strptime(timeStr, "%Y-%m-%d %H:%M:%S")
return int(timeArray.timestamp())
def CalcDistribution(date, data):
borders = [0, 3000, 6000]
values = [0, 0, 0]
for i in range(0, len(data)):
temp = data.iloc[i]
if temp["diamonds"] >= borders[0] and temp["diamonds"] < borders[1]:
values[0] = values[0] + 1
elif temp["diamonds"] >= borders[1] and temp["diamonds"] < borders[2]:
values[1] = values[1] + 1
elif temp["diamonds"] >= borders[2]:
values[2] = values[2] + 1
one = {
"date": date,
"[0, 3000)":values[0],
"[3000, 6000}":values[1],
"[6000, +max)":values[2]
}
return one
list = []
def FormatTime(timeDate):
return timeDate[0:10]
def FormatInt(value):
if type(value) == int:
return int(value)
else:
return value.strip('\"')
def CalcDiamondsFile(file):
data = pd.read_csv(file)
data["date"] = data.apply(lambda x : FormatTime(x["事件时间"]), axis=1)
data["time"] = data.apply(lambda x : GetTime(x["事件时间"]), axis=1)
data["diamonds"] = data.apply(lambda x : FormatInt(x["钻石存量"]), axis=1)
for date, group in data.groupby("date"):
group.sort_values("time")
group = group.drop_duplicates(subset=["账户ID"], keep='last',inplace=False)
list.append(CalcDistribution(date, group))
CalcDiamondsFile("./csv/ta/玩家快照10.3-10.9.csv")
CalcDiamondsFile("./csv/ta/玩家快照11.5-11.11.csv")
_cavData = pd.DataFrame(list)
_cavData.to_csv(f"./out/玩家钻石存量分布.csv", index=False)
print("2333")