import time
import datetime
import json
datakit_id = 'datakit'
mysql_id='mysql'

# 数据预警范围的波动范围与最小检测值常量
const_fluctuation = 0.5
const_min_count = {
    "logging": 500 * 10000,
    "tracing": 500 * 10000,
    "rum_pv": 5 * 10000,
    "timeseries": 2 * 10000,
    "job_runs": 0.1 * 10000,
    "profile": 10 * 10000,
    "session_replay": 0.5 * 10000,
    "data_forwarding_default_bytes": 20 * 1024 * 1024 * 1024 # 50 GB
}

@DFF.API('df_biz_cc_task')
def df_biz_cc_task():
    check_metering_data()

    return None

def _fetch_data_from_mysql(query):
    mysql = DFF.SRC(mysql_id)
    return mysql.query(query)

def _get_yesterday_date():
    return (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y-%m-%d')

def _get_before_yesterday_date():
    return (datetime.datetime.now() + datetime.timedelta(days=-2)).strftime('%Y-%m-%d')

def _get_last_two_day_data():
    try:
        yesterday_date = _get_yesterday_date()
        before_yesterday_date = _get_before_yesterday_date()

        yesterday_history_query = f"select * from biz_post_cc_history where date='{yesterday_date}' order by id"
        before_yesterday_history_query = f"select * from biz_post_cc_history where date='{before_yesterday_date}' order by id"

        print("yesterday SQL: ", yesterday_history_query)
        print("before yesterday SQL: ", before_yesterday_history_query)

        yesterday_res = _fetch_data_from_mysql(yesterday_history_query)
        before_yesterday_res = _fetch_data_from_mysql(before_yesterday_history_query)

        # print("yesterday data: ", yesterday_res)
        print("yesterday data length: ", len(yesterday_res))

        # print("before yesterday data: ", before_yesterday_res)
        print("before yesterday data length: ", len(before_yesterday_res))

        return yesterday_res, before_yesterday_res

    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None

def _metering_array_to_dict(data_list):
    result = {}

    for data in data_list:
        for region in ['cn', 'sg']:
            detail = json.loads(data.get('detail', '{}'))
            print(detail)
            req = json.loads(detail.get(region + "Req", "") or "[]")
            #print(detail)
            try:
                res = json.loads(detail.get(region + "Res", "") or "{}")
            except json.JSONDecodeError:
                res = ""
            #print(type(res))
            if type(res) == dict:
                status = res.get("msg", "") or "error"
            else:
                status = "error"

            # status = res.get("msg", "")  # success or error
            for item in req:
                workspace_uuid = item.get('workspace_uuid', '')
                workspace_name = item.get('workspace_name', '')

                result[workspace_uuid] = {
                    "workspace_uuid": workspace_uuid,
                    "workspace_name": workspace_name,
                    "stats": item.get('stats', {}),
                    "status": status
                }

    return result

def _logging_check(yesterday_count, before_yesterday_count):
    status = "normal"
    result = None
    for index, data in before_yesterday_count.items():
        if index in yesterday_count:
            by_count = before_yesterday_count[index]["count"]
            y_count = yesterday_count[index]["count"]

            if by_count > const_min_count['logging']:
                discrepancy = abs(by_count - y_count) / by_count
                if discrepancy > 0.5:
                    status = "discrepancy"
                    result = f"Index {index}: Logging discrepancy detected! Before Yesterday Count: {by_count}, Yesterday Count: {y_count}, Discrepancy: {discrepancy * 100:.2f}%, minimum fluctuation detection value: {const_min_count['logging']}"
        else:
            status = "missing"
            result = "Index {index}: Logging data missing in yesterday's logs!"

    return status, result

def _series_check(yesterday_count, before_yesterday_count):
    yesterday_count = {} if not yesterday_count else yesterday_count
    before_yesterday_count = {} if not before_yesterday_count else before_yesterday_count

    status = "normal"
    result = None
    for index, data in before_yesterday_count.items():
        if index in yesterday_count:
            by_count = before_yesterday_count[index].get("count", 0)
            y_count = yesterday_count[index].get("count", 0)

            if by_count > const_min_count['timeseries'] and y_count > 0:
                discrepancy = abs(by_count - y_count) / by_count
                if discrepancy > 0.5:
                    status = "discrepancy"
                    result = f"Measurement {index}: series discrepancy detected! Before Yesterday Count: {by_count}, Yesterday Count: {y_count}, Discrepancy: {discrepancy * 100:.2f}%, minimum fluctuation detection value: {const_min_count['timeseries']}"
            elif y_count == 0 and by_count > 0:
                status = "missing"
                result = f"Measurement {index}: series data missing in yesterday's measurements! Before Yesterday Count: {by_count}, Yesterday Count: {y_count}"
        else:
            status = "missing"
            result = f"Measurement {index}: series data missing in yesterday's measurements!"

    return status, result

def _common_check(data_type, y_count, by_count):
    status = "normal"
    result = None
    if by_count > const_min_count[data_type]:
        discrepancy = abs(by_count - y_count) / by_count
        if discrepancy > 0.5:
            status = "discrepancy"
            result = f"{data_type}: {data_type} discrepancy detected! Before Yesterday Count: {by_count}, Yesterday Count: {y_count}, Discrepancy: {discrepancy * 100:.2f}%, minimum fluctuation detection value: {const_min_count[data_type]}"

    return status, result

def _check_metrics(yesterday_stats, before_yesterday_stats):
    def __run_common_check(check_type):
        status, reason = _common_check(check_type, yesterday_stats.get(check_type, 0), before_yesterday_stats.get(check_type, 0))
        return {"type": check_type, "reason": status, "reason_description": reason}

    result = []

    # logging check
    status, reason = _logging_check(yesterday_stats.get("logging", {}), before_yesterday_stats.get("logging", {}))
    result.append({"type": "logging", "reason": status, "reason_description": reason})

    # series check
    status, reason = _series_check(yesterday_stats.get("timeseries", {}), before_yesterday_stats.get("timeseries", {}))
    result.append({"type": "timeseries", "reason": status, "reason_description": reason})

    common_checks = [
        "tracing",
        "rum_pv",
        "job_runs",
        "data_forwarding_default_bytes",
        "profile",
        "session_replay"
    ]

    for check_type in common_checks:
        result.append(__run_common_check(check_type))

    return result

def _post_to_guance(type, data):
    datakit = DFF.SRC(datakit_id)
    print(f"type {type}: ", data)

    if type == "log":
        res = datakit.write_logging_many(data)
    elif type == "metric":
        res = datakit.write_metric_many(data)
    else:
        print("unknow type")

    return res
    # return None

def _create_log_data_item(item, status):
    return {
        'measurement': 'metering',
        'tags': {
            'service': 'daily_metering_reporting',
            'metering_date': _get_yesterday_date(),
            'metering_project': item.get('type', ''),
            'guance_site': DFF.ENV('GUANCE_SITE'),
            'status': status
        },
        'fields': {
            "message": json.dumps(item)
        }
    }

def _create_metric_data_item(success_workspace, fault_discrepancy_workspace, fault_missing_workspace, fault_post_failed_workspace):
    return {
        'measurement': 'metering',
        'tags': {
            'service': 'daily_metering_reporting',
            'guance_site': DFF.ENV('GUANCE_SITE'),
            'metering_date': _get_yesterday_date()
        },
        'fields': {
            "success_workspace_count": len(success_workspace),
            "fault_discrepancy_workspace_count": len(fault_discrepancy_workspace),
            "fault_missing_workspace": len(fault_missing_workspace),
            "fault_post_failed_workspace": len(fault_post_failed_workspace)
        }
    }

def _gen_detection_metrics(check_result_data):
    # 字典数据格式
    # {
    #     "fault_post_failed_workspace": fault_post_failed_workspace,
    #     "fault_missing_workspace": fault_missing_workspace
    #     "fault_discrepancy_workspace": fault_discrepancy_workspace
    #     "success_workspace": success_workspace
    # }

    fault_post_failed_workspace = check_result_data.get("fault_post_failed_workspace", [])
    fault_missing_workspace = check_result_data.get("fault_missing_workspace", [])
    fault_discrepancy_workspace = check_result_data.get("fault_discrepancy_workspace", [])
    success_workspace = check_result_data.get("success_workspace", [])

    log_data_items = []
    metric_data_items = []

    for item in fault_missing_workspace + fault_post_failed_workspace:
        log_data_items.append(_create_log_data_item(item, 'error'))

    for item in fault_discrepancy_workspace:
        log_data_items.append(_create_log_data_item(item, 'warning'))

    metric_data_items.append(
        _create_metric_data_item(
            success_workspace,
            fault_discrepancy_workspace,
            fault_missing_workspace,
            fault_post_failed_workspace
        )
    )

    _post_to_guance("log", log_data_items)
    _post_to_guance("metric", metric_data_items)

    return None

# 计量数据上报的检测逻辑：
#     前日日志大于 500万，且波动超过 50%
#     前日链路大于 500万，且波动超过 50%
#     前日活跃时间线大于 > 2 万，且波动超过 50%
#     所有计量项，前日有量，昨日为 0 或者没有本计量项
def check_metering_data():
    # 获取最近两天的计量数据上报数据
    yesterday, before_yesterday = _get_last_two_day_data()

    # print(yesterday)
    yesterday_dict = _metering_array_to_dict(yesterday)
    before_yesterday_dict = _metering_array_to_dict(before_yesterday)

    # print("yesterday data: ", yesterday_dict)
    # print("before yesterday data: ", before_yesterday_dict)

    fault_post_failed_workspace = []       # 接口上报失败的工作空间
    fault_missing_workspace = []           # 前天有计量数据，但昨天没有的工作空间
    fault_discrepancy_workspace = []       # 昨日相对前日有较大波动的工作空间
    success_workspace = []                 # 上报成功无异常的工作空间

    for key, val in yesterday_dict.items():
        if val.get('status') != "success":
            fault_post_failed_workspace.append({
                "workspace_uuid": key,
                "workspace_name": val.get('workspace_name', ''),
                "type": "all",
                "reason": "post_failed"
            })

    # 循环前天的计量数据，对比昨日计量数据
    for key, val in before_yesterday_dict.items():
        yesterday_data = yesterday_dict.get(key, None)
        before_yesterday_stats = val.get('stats', {})

        if yesterday_data:
            yesterday_stats = yesterday_data.get('stats', {})
            metric_check_results = _check_metrics(yesterday_stats, before_yesterday_stats)

            for metric_check in metric_check_results:
                result_dict = {
                    "workspace_uuid": key,
                    "workspace_name": val.get('workspace_name', ''),
                    "type": metric_check["type"],
                    "reason": metric_check["reason"],
                    "reason_description": metric_check["reason_description"]
                }

                if metric_check["reason"] == "normal":
                    success_workspace.append(result_dict)
                elif metric_check["reason"] == "missing":
                    fault_missing_workspace.append(result_dict)
                elif metric_check["reason"] == "discrepancy":
                    fault_discrepancy_workspace.append(result_dict)
        elif not yesterday_data:
            # 前天存在的工作空间，昨日的计量数据中没有
            fault_missing_workspace.append({
                "workspace_uuid": key,
                "workspace_name": val.get('workspace_name', ''),
                "type": "all",
                "reason": "missing",
                "reason_description": "complete absence of metering information"
            })

    # 生成检测结果指标或日志，并向工作空间输出
    _gen_detection_metrics({
        "fault_post_failed_workspace": fault_post_failed_workspace,
        "fault_missing_workspace": fault_missing_workspace,
        "fault_discrepancy_workspace": fault_discrepancy_workspace,
        "success_workspace": success_workspace
    })

    return None
