import hashlib
from collections import OrderedDict
from datetime import datetime
from datetime import timedelta

import numpy as np
from odps.models import Column
from odps.models import Partition
from odps.models import Schema

bizdate = args['bizdate']  # NOQA

TIMESTAMP_DAY_MIN = 'timestamp__day_min'
TIMESTAMP_DAY_MAX = 'timestamp__day_max'

USER_ID = 'user_id'
USER_ID_HOT_VALUE = 'user_id__hot_value'
ITEM_ID = 'item_id'
ITEM_ID_HOT_VALUE = 'item_id__hot_value'
EXP_ID = 'exp_id'
EVENT = 'event'

DTYPE_MAP = {int: 'BIGINT', float: 'DOUBLE', str: 'STRING'}

USER_CONFIG = {
    'table_name': args['user_table_name'],  # NOQA
    'amount': {
        'random_type': 'uniform',
        'max': 105000,
        'min': 95000,
        'dtype': int
    },
    # overlap
    'new_ratio': {
        'random_type': 'uniform',
        'max': 0.3,
        'min': 0.2,
        'dtype': float
    },
    'overlap_max_days': 30,
    'features': {
        USER_ID: {
            'random_type': 'uniform',
            'max': 199999999,
            'min': 100000000,
            'dtype': int,
            'comment': '用户唯一ID'
        },
        USER_ID_HOT_VALUE: {
            'random_type': 'exponential',
            'beta': 0.3,
            'dtype': float,
            'comment': ''
        },
        'gender': {
            'random_type': 'choice',
            'choices': ['male', 'female', None],
            'p': [0.6, 0.3, 0.1],
            'p_disturb': 0.01,
            'dtype': str,
            'comment': '性别'
        },
        'age': {
            'random_type':
            'choice',
            'choices': [
                52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27,
                26, 25, 24, 23, 22, 21, 20, 19, 18
            ],
            'p': [
                0.005, 0.004, 0.004, 0.004, 0.004, 0.005, 0.005, 0.004, 0.006, 0.005, 0.011, 0.006, 0.007, 0.006, 0.006,
                0.007, 0.008, 0.008, 0.010, 0.009, 0.012, 0.009, 0.009, 0.007, 0.680, 0.008, 0.064, 0.008, 0.008, 0.007,
                0.008, 0.046, 0.004, 0.004, 0.002
            ],
            'p_disturb':
            0.001,
            'dtype':
            int,
            'comment':
            '年龄'
        },
        'city': {
            'random_type':
            'choice',
            'choices': [
                None, '北京市', '上海市', '重庆市', '成都市', '广州市', '深圳市', '东莞市', '苏州市', '武汉市', '天津市', '温州市', '杭州市', '西安市', '宁波市',
                '佛山市', '无锡市', '哈尔滨市', '南通市', '长沙市', '南京市', '沈阳市', '合肥市', '保定市', '金华市', '长春市', '大连市', '唐山市', '福州市', '泉州市'
            ],
            'p': [
                0.55, 0.05, 0.04, 0.04, 0.03, 0.02, 0.02, 0.02, 0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
                0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01
            ],
            'p_disturb':
            0.01,
            'dtype':
            str,
            'comment':
            '城市'
        },
        'item_cnt': {
            'random_type':
            'choice',
            'choices': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 19, 22, 25, 29, 34, 41, 51, 66, 93, 157],
            'p': [
                0.857, 0.047, 0.021, 0.013, 0.009, 0.006, 0.005, 0.004, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002
            ],
            'p_disturb':
            0.001,
            'dtype':
            int,
            'comment':
            '创作内容数'
        },
        'follow_cnt': {
            'random_type':
            'choice',
            'choices': [
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28,
                30, 31, 33, 35, 37, 39, 42, 44, 47, 49, 51, 55, 58, 62, 67, 72, 77, 83, 89, 96, 102, 110, 119, 129, 141,
                153, 167, 183, 200, 223, 245, 272, 308, 357, 423, 516, 653, 903, 1579
            ],
            'p': [
                0.576, 0.161, 0.049, 0.025, 0.019, 0.014, 0.008, 0.008, 0.007, 0.006, 0.005, 0.005, 0.004, 0.004, 0.003,
                0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002
            ],
            'p_disturb':
            0.001,
            'dtype':
            int,
            'comment':
            '关注数'
        },
        'follower_cnt': {
            'random_type':
            'choice',
            'choices': [
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
                29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 52, 55, 58, 62, 66, 70, 75, 80, 85, 90, 96, 102, 110, 118,
                126, 134, 144, 154, 166, 179, 195, 213, 234, 260, 292, 332, 385, 456, 558, 717, 1016, 1811
            ],
            'p': [
                0.610, 0.116, 0.045, 0.027, 0.018, 0.013, 0.010, 0.009, 0.007, 0.006, 0.006, 0.005, 0.004, 0.004, 0.004,
                0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002
            ],
            'p_disturb':
            0.001,
            'dtype':
            int,
            'comment':
            '粉丝数'
        },
        'register_time': {
            'random_type': 'uniform',
            'max': TIMESTAMP_DAY_MAX,
            'min': TIMESTAMP_DAY_MIN,
            'dtype': int,
            'comment': '注册时间'
        },
        'tags': {
            'random_type':
            'choice',
            'choices': [
                '0', '1', '2', '1,2', '3', '1,3', '2,3', '1,2,3', '4', '1,4', '2,4', '1,2,4', '3,4', '1,3,4', '2,3,4',
                '1,2,3,4'
            ],
            'p': [
                0.440, 0.383, 0.086, 0.037, 0.011, 0.007, 0.006, 0.004, 0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.003,
                0.003
            ],
            'p_disturb':
            0.001,
            'dtype':
            str,
            'comment':
            '用户标签'
        }
    },
    'seed': 'user'
}

ITEM_CONFIG = {
    'table_name': args['item_table_name'],  # NOQA
    'amount': {
        'random_type': 'uniform',
        'max': 52000,
        'min': 48000,
        'dtype': int
    },
    # overlap
    'new_ratio': {
        'random_type': 'uniform',
        'max': 0.2,
        'min': 0.1,
        'dtype': float
    },
    'overlap_max_days': 30,
    'features': {
        ITEM_ID: {
            'random_type': 'uniform',
            'max': 299999999,
            'min': 200000000,
            'dtype': int,
            'comment': '内容ID'
        },
        ITEM_ID_HOT_VALUE: {
            'random_type': 'exponential',
            'beta': 0.3,
            'dtype': float,
            'comment': ''
        },
        'duration': {
            'random_type':
            'choice',
            'choices': [
                5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
                32, 33, 35, 36, 38, 39, 41, 43, 45, 48, 50, 52, 55, 57, 59, 60, 63, 68, 74, 81, 89, 98, 109, 121, 138,
                159, 184, 215, 251, 300
            ],
            'p': [
                0.01, 0.01, 0.03, 0.03, 0.03, 0.04, 0.04, 0.04, 0.04, 0.05, 0.1, 0.05, 0.04, 0.03, 0.02, 0.02, 0.02,
                0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
                0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
                0.01, 0.01, 0.01, 0.01, 0.01, 0.01
            ],
            'p_disturb':
            0.01,
            'dtype':
            float,
            'comment':
            '视频时长'
        },
        'title': {
            'random_type': 'choice',
            'choices': ['#成语故事', '#健身打卡', '#吉他弹唱', None],
            'p': [0.3, 0.4, 0.2, 0.1],
            'p_disturb': 0.01,
            'dtype': str,
            'comment': '标题'
        },
        'category': {
            'random_type':
            'choice',
            'choices':
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27],
            'p': [
                0.06, 0.05, 0.02, 0.01, 0.02, 0.01, 0.03, 0.04, 0.05, 0.01, 0.01, 0.04, 0.01, 0.01, 0.15, 0.01, 0.01,
                0.05, 0.07, 0.01, 0.14, 0.01, 0.01, 0.01, 0.01, 0.02, 0.01, 0.12
            ],
            'p_disturb':
            0.01,
            'dtype':
            str,
            'comment':
            '一级标签'
        },
        'author': {
            'random_type': 'choice',
            'choices': USER_ID,
            'p': USER_ID_HOT_VALUE,
            'dtype': int,
            'comment': '作者'
        },
        'click_count': {
            'random_type':
            'choice',
            'choices': [
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
                28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 50, 51, 53, 54, 56, 58,
                60, 62, 64, 66, 69, 72, 75, 78, 81, 85, 89, 94, 99, 105, 112, 120, 129, 139, 150, 164, 183, 210, 250,
                325, 434, 579
            ],
            'p': [
                0.536, 0.041, 0.032, 0.027, 0.023, 0.020, 0.018, 0.016, 0.015, 0.014, 0.013, 0.012, 0.011, 0.011, 0.010,
                0.009, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006, 0.006, 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004,
                0.004, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002
            ],
            'dtype':
            int,
            'comment':
            '累计点击数'
        },
        'praise_count': {
            'random_type':
            'choice',
            'choices': [
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
                28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 51, 53, 55, 58, 60, 64,
                68, 73, 79, 88, 100, 118, 148, 202, 344
            ],
            'p': [
                0.305, 0.081, 0.060, 0.051, 0.044, 0.040, 0.036, 0.032, 0.029, 0.026, 0.024, 0.022, 0.020, 0.018, 0.016,
                0.014, 0.013, 0.012, 0.011, 0.010, 0.009, 0.008, 0.007, 0.007, 0.006, 0.006, 0.005, 0.005, 0.005, 0.004,
                0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002,
                0.002, 0.002, 0.002
            ],
            'dtype':
            int,
            'comment':
            '累计点赞数'
        },
        'pub_time': {
            'random_type': 'uniform',
            'max': TIMESTAMP_DAY_MAX,
            'min': TIMESTAMP_DAY_MIN,
            'dtype': int,
            'comment': '发布时间'
        }
    },
    'seed': 'item'
}

BEHAV_CONFIG = {
    'table_name': args['behavior_table_name'],  # NOQA
    'request_amount': {
        'random_type': 'uniform',
        'max': 105000,
        'min': 95000,
        'dtype': int
    },
    USER_ID: {
        'random_type': 'choice',
        'choices': USER_ID,
        'p': USER_ID_HOT_VALUE,
        'dtype': int,
        'comment': '用户唯一ID'
    },
    ITEM_ID: {
        'random_type': 'choice',
        'choices': ITEM_ID,
        'p': ITEM_ID_HOT_VALUE,
        'dtype': int,
        'comment': '内容ID'
    },
    EXP_ID: {
        'choices': ['ER2_L2#EG2#E3', 'ER2_L2#EG2#E4'],
        'p': [0.5, 0.5],
        'dtype': str,
        'comment': '实验ID'
    },
    EVENT: {
        'random_type': 'uniform',
        'max': 10,
        'min': 1,
        'dtype': int,
        'comment': '行为类型',
        'event_types': {
            'expr': {
                'exp_vals': [
                    # equal to number of exp_ids
                    {
                        # rate of event depend on, default depend on all requests, for mock missing expr event
                        'rate': 0.9,
                        'rate_disturb': 0.01
                    },
                    {
                        'rate': 0.9,
                        'rate_disturb': 0.01
                    }
                ]
            },
            'click': {
                'exp_vals': [
                    # equal to number of exp_ids
                    {
                        'rate': 0.3,
                        'rate_disturb': 0.01,
                    },
                    {
                        'rate': 0.35,
                        'rate_disturb': 0.01
                    }
                ],
                'depend': {
                    EVENT: 'expr'
                }
            },
            'praise': {
                'exp_vals': [
                    # equal to number of exp_ids
                    {
                        'rate': 0.35,
                        'rate_disturb': 0.01,
                    },
                    {
                        'rate': 0.4,
                        'rate_disturb': 0.01
                    }
                ],
                'depend': {
                    EVENT: 'click'
                }
            }
        },
        'event_values': {
            'playtime': {
                'exp_vals': [
                    # equal to number of exp_ids
                    {
                        'random_type': 'uniform',
                        'max': 100,
                        'min': 0,
                        'dtype': float
                    },
                    {
                        'random_type': 'uniform',
                        'max': 150,
                        'min': 0,
                        'dtype': float
                    }
                ],
                'depend': {
                    EVENT: 'click'
                },
                'comment':
                '播放时长/阅读时长'
            }
        }
    },
    'features': {
        'page': {
            'random_type': 'choice',
            'choices': ['home', 'detail'],
            'p': [0.3, 0.7],
            'p_disturb': 0.01,
            'dtype': str,
            'comment': '页面'
        },
        'net_type': {
            'random_type': 'choice',
            'choices': ['4g', 'wifi'],
            'p': [0.2, 0.8],
            'p_disturb': 0.01,
            'dtype': str,
            'comment': '网络型号'
        },
        'event_time': {
            'random_type': 'uniform',
            'max': TIMESTAMP_DAY_MAX,
            'min': TIMESTAMP_DAY_MIN,
            'dtype': int,
            'comment': '行为时间'
        }
    },
    'seed': 'bhv'
}


def hash_seed(seed):
    r = int.from_bytes(hashlib.md5(seed.encode()).digest()[:4], 'little')
    return r


def random_distribution(config, size=None, seed=None, **kwargs):
    if seed:
        np.random.seed(hash_seed(seed))
    if config['random_type'] == 'uniform':
        if config['min'] == TIMESTAMP_DAY_MIN:
            low = datetime.strptime(kwargs['bizdate'] + '000000', '%Y%m%d%H%M%S').timestamp()
        else:
            low = config['min']
        if config['max'] == TIMESTAMP_DAY_MAX:
            high = datetime.strptime(kwargs['bizdate'] + '235959', '%Y%m%d%H%M%S').timestamp()
        else:
            high = config['max']
        x = np.random.uniform(low, high, size=size)
        if size is not None:
            x = x.astype(config['dtype'])
        else:
            x = config['dtype'](x)
        return x
    elif config['random_type'] == 'choice':
        choices = config['choices']
        if choices == USER_ID:
            choices = kwargs.get(USER_ID)['value']
        elif choices == ITEM_ID:
            choices = kwargs.get(ITEM_ID)['value']

        p = config['p']
        if p == USER_ID_HOT_VALUE:
            p = kwargs.get(USER_ID_HOT_VALUE)['value'] / np.sum(kwargs.get(USER_ID_HOT_VALUE)['value'])
        elif p == ITEM_ID_HOT_VALUE:
            p = kwargs.get(ITEM_ID_HOT_VALUE)['value'] / np.sum(kwargs.get(ITEM_ID_HOT_VALUE)['value'])

        if 'p_disturb' in config:
            p = random_disturb(p, norm_scale=config['p_disturb'], force_positive=True)
        return np.random.choice(choices, size=size, p=p)
    elif config['random_type'] == 'choice_bool':
        p = [1 - config['p_true'], config['p_true']]
        if 'p_disturb' in config:
            p = random_disturb(p, norm_scale=config['p_disturb'], force_positive=True)
        return np.random.choice([False, True], size=size, p=p)
    elif config['random_type'] == 'exponential':
        return np.random.exponential(config['beta'], size=size)
    else:
        raise ValueError('Unknown random_type %s' % config['random_type'])


def random_disturb(x, norm_scale=0.1, force_positive=False, seed=None):
    if seed:
        np.random.seed(hash_seed(seed))
    if isinstance(x, (float, int)):
        shape = None
    else:
        x = np.asarray(x)
        shape = x.size
    disturb = np.clip(np.random.normal(0, norm_scale, size=shape), -3 * norm_scale, 3 * norm_scale)
    x_new = x + disturb
    if force_positive:
        x_new = np.clip(x_new, 0, np.max(x_new))
    return x_new / np.sum(x_new) * np.sum(x)


def generate_dim_data(config, dim_id, infos={}):
    total_amount = random_distribution(config['amount'], seed=config['seed'] + bizdate + 'total_amount')
    new_ratio = random_distribution(config['new_ratio'], seed=config['seed'] + bizdate + 'new_ratio')
    old_ratios = random_disturb(
        [(1 - new_ratio) / config['overlap_max_days']] * config['overlap_max_days'],
        norm_scale=(1 - new_ratio) / config['overlap_max_days'] / 6,
        seed=config['seed'] + bizdate + 'old_ratios')
    ratios = [new_ratio] + list(old_ratios)

    biz_datetime = datetime.strptime(bizdate, '%Y%m%d')

    data = OrderedDict()
    for i in range(config['overlap_max_days'] + 1):
        cur_datetime = biz_datetime - timedelta(i)
        curdate = cur_datetime.strftime('%Y%m%d')

        cur_amount = int(total_amount * ratios[i])
        for feature_name, feature_config in config['features'].items():
            if feature_name not in data:
                data[feature_name] = {
                    'value': [],
                    'dtype': feature_config['dtype'],
                    'comment': feature_config['comment']
                }
            data[feature_name]['value'].extend(
                random_distribution(
                    feature_config,
                    size=cur_amount,
                    seed=config['seed'] + curdate + feature_name,
                    bizdate=curdate,
                    **infos))

    # unique dim id
    unique_index = np.sort(np.unique(data[dim_id]['value'], return_index=True)[1])
    columns = []
    columns_data = []
    for feature_name, feature_value in data.items():
        if feature_name not in (USER_ID_HOT_VALUE, ITEM_ID_HOT_VALUE):
            columns.append(
                Column(name=feature_name, type=DTYPE_MAP[feature_value['dtype']], comment=feature_value['comment']))
            columns_data.append(np.asarray(feature_value['value'])[unique_index])
    partitions = [Partition(name='ds', type='string')]
    columns_data.append([bizdate] * len(columns_data[0]))
    schema = Schema(columns=columns, partitions=partitions)

    t = o.create_table(config['table_name'], schema, if_not_exists=True)  # NOQA
    t.delete_partition('ds=%s' % bizdate, if_exists=True)
    with t.open_writer(partition='ds=%s' % bizdate, create_partition=True) as writer:
        writer.write(list(zip(*columns_data)))
    return data


def generate_behavior_data(config, user_data, item_data):
    request_amount = random_distribution(config['request_amount'], seed=bizdate + 'total_amount')

    data = OrderedDict()
    # request_id
    req_id_config = {'random_type': 'uniform', 'max': 999999999, 'min': 900000000, 'dtype': int}
    data['request_id'] = {
        'value': random_distribution(req_id_config, size=request_amount, seed=config['seed'] + bizdate + 'request_id'),
        'dtype': int,
        'comment': '埋点ID/请求ID'
    }

    # user_id
    data[USER_ID] = {
        'value':
        random_distribution(config[USER_ID], size=request_amount, seed=config['seed'] + bizdate + USER_ID, **user_data),
        'dtype':
        user_data[USER_ID]['dtype'],
        'comment':
        config[USER_ID]['comment']
    }

    # exp_id
    exp_id_bins = []
    assert sum(config[EXP_ID]['p']) == 1
    for exp_id, exp_id_p in zip(config[EXP_ID]['choices'], config[EXP_ID]['p']):
        exp_id_bins.extend([exp_id] * round(exp_id_p * 100))
    data[EXP_ID] = {
        'value': np.asarray([exp_id_bins[hash(user_id) % 100] for user_id in data[USER_ID]['value']]),
        'dtype': config[EXP_ID]['dtype'],
        'comment': config[USER_ID]['comment']
    }

    for feature_name, feature_config in config['features'].items():
        data[feature_name] = {
            'value':
            random_distribution(
                feature_config, size=request_amount, seed=config['seed'] + bizdate + feature_name, bizdate=bizdate),
            'dtype':
            feature_config['dtype'],
            'comment':
            feature_config['comment']
        }

    # expand with event_counts
    event_counts = random_distribution(config[EVENT], size=request_amount, seed=config['seed'] + bizdate + EVENT)
    for k, v in data.items():
        data[k] = {'value': np.repeat(v['value'], event_counts), 'dtype': v['dtype'], 'comment': v['comment']}

    # choice each request with item_id
    data[ITEM_ID] = {
        'value':
        random_distribution(
            config[ITEM_ID], size=sum(event_counts), seed=config['seed'] + bizdate + ITEM_ID, **item_data),
        'dtype':
        item_data[ITEM_ID]['dtype'],
        'comment':
        config[ITEM_ID]['comment']
    }

    # expand with different event types
    expanded_data = {}
    for i, exp_id in enumerate(config[EXP_ID]['choices']):
        event_flags = {}
        for event, event_config in config[EVENT]['event_types'].items():
            exp_vals_config = event_config['exp_vals'][i]
            if 'depend' not in event_config:
                base_flag = np.asarray(data[EXP_ID]['value']) == exp_id
            else:
                base_flag = event_flags[event_config['depend'][EVENT]]
            event_flag = random_distribution(
                {
                    'random_type': 'choice_bool',
                    'p_true': exp_vals_config['rate'],
                    'p_disturb': exp_vals_config['rate_disturb']
                },
                size=sum(event_counts),
                seed=config['seed'] + bizdate + event + exp_id) & base_flag
            event_flags[event] = event_flag
            for feature_name, feature_value in data.items():
                if feature_name not in expanded_data:
                    expanded_data[feature_name] = {
                        'value': [],
                        'dtype': feature_value['dtype'],
                        'comment': feature_value['comment']
                    }
                expanded_data[feature_name]['value'].extend(feature_value['value'][event_flag])
            if EVENT not in expanded_data:
                expanded_data[EVENT] = {'value': [], 'dtype': str, 'comment': config[EVENT]['comment']}
            expanded_data[EVENT]['value'].extend([event] * sum(event_flag))

    # add event values like playtime
    for i, exp_id in enumerate(config[EXP_ID]['choices']):
        for event_v, event_v_config in config[EVENT]['event_values'].items():
            exp_vals_config = event_v_config['exp_vals'][i]
            base_flag = np.asarray(expanded_data[EXP_ID]['value']) == exp_id
            if 'depend' in event_v_config:
                base_flag *= np.asarray(expanded_data[EVENT]['value']) == event_v_config['depend'][EVENT]
            if event_v not in expanded_data:
                expanded_data[event_v] = {
                    'value': np.zeros(len(base_flag)),
                    'dtype': exp_vals_config['dtype'],
                    'comment': event_v_config['comment']
                }
            expanded_data[event_v]['value'] += random_distribution(
                exp_vals_config, size=len(base_flag), seed=config['seed'] + bizdate + event_v + exp_id) * base_flag

    columns = []
    columns_data = []
    for feature_name, feature_value in expanded_data.items():
        columns.append(
            Column(name=feature_name, type=DTYPE_MAP[feature_value['dtype']], comment=feature_value['comment']))
        columns_data.append(feature_value['value'])
    partitions = [Partition(name='ds', type='string')]
    schema = Schema(columns=columns, partitions=partitions)
    columns_data.append([bizdate] * len(columns_data[0]))

    t = o.create_table(config['table_name'], schema, if_not_exists=True)  # NOQA
    t.delete_partition('ds=%s' % bizdate, if_exists=True)
    with t.open_writer(partition='ds=%s' % bizdate, create_partition=True) as writer:
        writer.write(list(zip(*columns_data)))


if __name__ == '__main__':
    print('generate user data ...')
    user_data = generate_dim_data(USER_CONFIG, USER_ID)
    print('generate item data ...')
    item_data = generate_dim_data(ITEM_CONFIG, ITEM_ID, infos=user_data)
    print('generate behavior data ...')
    generate_behavior_data(BEHAV_CONFIG, user_data, item_data)
