nft-deal-score/solana_model.py

import os
import re
import json
import time
import pickle
from textwrap import indent
import warnings
import requests
import numpy as np
import pandas as pd
import kutils as ku
import urllib.request
import tensorflow as tf
import snowflake.connector

from curses import meta
from copy import deepcopy
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, RidgeCV, Lasso, Ridge
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV

os.chdir('/Users/kellenblumberg/git/nft-deal-score')
from utils import merge, clean_name

warnings.filterwarnings('ignore')

# 1-att (naked 1-att?)
# matching aesthetics
# type
# laser eyes
# vipers
# pirate hat
# sombrero
# cowboy hat
# admiral hat

###################################
#     Define Helper Functions     #
###################################
def standardize_df(df, cols, usedf=None, verbose=False):
	for c in cols:
		if type(usedf) != type(pd.DataFrame()):
			usedf = df
		mu = usedf[c].mean()
		sd = usedf[c].std()
		if verbose:
			print(c)
		if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
			# df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
			df['std_{}'.format(c)] = df[c]
		else:
			df['std_{}'.format(c)] = (df[c] - mu) / sd
	return(df)

def just_float(x):
	x = re.sub('[^\d\.]', '', str(x))
	return(float(x))

def calculate_percentages(df, cols=[]):
	add_pct = not 'pct' in df.columns
	if not len(cols):
		cols = df.columns
	if add_pct:
		df['pct'] = 1
	for c in cols:
		g = df[c].value_counts().reset_index()
		g.columns = [ c, 'N' ]
		col = '{}_pct'.format(c)
		g[col] = g.N / g.N.sum()
		df = df.merge( g[[ c, col ]] )
		if add_pct:
			df['pct'] = df.pct * df[col]
	return(df)

def get_sales(check_exclude = True, exclude=[]):

	s_df = pd.read_csv('./viz/nft_deal_score_sales.csv').rename(columns={'sale_date':'block_timestamp'})
	sorted(s_df.collection.unique())
	s_df['token_id'] = s_df.token_id.astype(str)
	s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
	# s_df['collection'] = s_df.collection_x.fillna(s_df.collection_y).fillna(s_df.collection).apply(lambda x: clean_name(x))
	s_df = s_df.drop_duplicates(subset=['token_id','collection','price'])
	s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
	s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
	s_df = s_df[[ 'collection','block_timestamp','token_id','price','tx_id' ]]
	for e in exclude:
		s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == str(e[1])) & (s_df.price == e[2]) )]
	s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]

	# exclude wierd data points
	if not check_exclude:
		train_exclude = pd.read_csv('./data/train_exclude.csv')
		include = pd.read_csv('./data/include.csv')
		include['include'] = 1
		train_exclude['train_exclude'] = 1
		exclude = pd.read_csv('./data/exclude.csv')
		exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
		exclude['token_id'] = exclude.token_id.astype(str)
		include['token_id'] = include.token_id.astype(str)
		train_exclude['token_id'] = train_exclude.token_id.astype(str)
		s_df = s_df.merge(exclude, how='left')
		s_df = s_df.merge(include, how='left')
		s_df = s_df.merge(train_exclude, how='left')
		s_df = s_df[ - ((s_df.include.isnull()) & (s_df.exclude.notnull())) ]
		del s_df['exclude']
		del s_df['include']

	###########################
	#     Calculate Floor     #
	###########################
	s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
	s_df['timestamp'] = s_df.block_timestamp.astype(int)
	s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)

	# lowest price in last 20 sales
	s_df = s_df.sort_values(['collection','block_timestamp'])
	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
	s_df = s_df.sort_values(['collection','block_timestamp'])
	s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)

	# exclude sales that are far below the existing floor
	s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]

	# 10%ile of last 20 sales
	s_df = s_df.sort_values(['collection','block_timestamp'])
	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
	s_df = s_df.sort_values(['collection','block_timestamp'])
	# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True)
	s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True)
	s_df['sim'] = 0
	s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
	s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
	return(s_df)

def get_coefs(cols, coef):
	coefs = []
	for a, b in zip(cols, coef):
		coefs += [[a,b]]
	coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
	# coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
	# coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x )
	# coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 )
	def f(x):
		if x['col'] in ['std_nft_rank','collection_rank']:
			return(-1)
		pos = ['adj_nft_rank','is_top_','y_pred_','matching','naked_1_att']
		for p in pos:
			if p in x['col']:
				return(1)
		if x['coef'] >= 0:
			return(1)
		return(-1)
	coefs['mult'] = coefs.apply(lambda x: f(x), 1 )
	coefs['val'] = coefs.mult * coefs.coef
	coefs = coefs.sort_values('val', ascending=0)
	return(coefs)

def train_model(check_exclude=False, supplement_with_listings=True, use_saved_params=True):
	exclude = [
		( 'aurory', 2239, 3500 )
		, ( 'aurory', 1876, 789 )
		, ( 'aurory', 2712, 500 )
		, ( 'aurory', 5368, 500 )
		, ( 'aurory', 9239, 1700 )
		, ( 'BAYC', 3231, 267500 )
		, ( 'BAYC', 3485, 250000 )
		, ( 'BAYC', 4037, 150000 )
		, ( 'BAYC', 318, 5850 )
		, ( 'BAYC', 1159, 4000 )
		, ( 'BAYC', 6538, 2400 )
		, ( 'BAYC', 232, 1032.05895 )
		, ( 'BAYC', 6326, 800 )
		, ( 'BAYC', 6924, 666 )
		, ( 'BAYC', 9198, 500 )
		, ( 'BAYC', 3001, 500 )
		, ( 'BAYC', 3562, 430 )
	]
	s_df = get_sales(check_exclude, exclude)
	# s_df.groupby('collection').block_timestamp.max()
	# s_df[s_df.collection == 'BAYC'].sort_values('block_timestamp', ascending=0).head()[['token_id','block_timestamp','price']]
	# s_df[s_df.collection == 'MAYC'].sort_values('price', ascending=0).head()
	# s_df[s_df.collection == 'Solana Monkey Business'].to_csv('./tableau/data/smb_sales.csv', index=False)
	# s_df = s_df[-s_df.collection.isin(['BAYC','MAYC'])]
	# s_df[s_df.collection.isnull()]
	# s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
	# s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
	# s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
	# s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
	# s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
	# for e in exclude:
	#     s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
	# s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]

	# # exclude wierd data points
	# if not check_exclude:
	#     exclude = pd.read_csv('./data/exclude.csv')
	#     exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
	#     s_df = s_df.merge(exclude, how='left')
	#     s_df = s_df[s_df.exclude.isnull()]
	#     del s_df['exclude']

	#########################
	#     Load Metadata     #
	#########################
	m_df = pd.read_csv('./data/metadata.csv')
	# sorted(m_df.collection.unique())
	# sorted(s_df.collection.unique())
	tmp = m_df[['collection','feature_name']].drop_duplicates().groupby('collection').feature_name.count().reset_index().sort_values('feature_name')
	tmp.to_csv('~/Downloads/tmp-2.csv', index=False)
	cs = sorted(s_df.collection.unique())
	m_df.head()
	cs
	m_df = m_df[m_df.collection.isin(cs)]
	sorted(m_df.collection.unique())
	m_df['feature_name'] = m_df.feature_name.apply(lambda x: 'Clothes' if x == 'Clother' else x )
	# m_df[m_df.collection == 'DeGods'][['feature_name']].drop_duplicates()
	# sorted(m_df.collection.unique())
	# m_df[m_df.collection == 'Aurory'][['collection','feature_name']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
	# sorted([x for x in m_df.feature_name.unique() if 'nft_' in x])
	m_df['token_id'] = m_df.token_id.astype(str)
	m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
	sorted(m_df.collection.unique())
	# remove ones that are not actually metadata
	m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
	m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x )
	m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')]
	sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique())
	sorted(m_df.collection.unique())


	#####################################
	#     Exclude Special LunaBulls     #
	#####################################
	tokens = pd.read_csv('./data/tokens.csv')
	tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
	tokens['token_id'] = tokens.token_id.astype(str)
	m_df = merge(m_df, tokens[['collection','token_id','clean_token_id']].dropna().drop_duplicates() , how='left', ensure=True, on=['collection','token_id'], message='m_df x tokens')
	m_df['token_id'] = m_df.clean_token_id.fillna(m_df.token_id).astype(float).astype(int).astype(str)
	s_df = merge(s_df, tokens[['collection','token_id','clean_token_id']].drop_duplicates(), how='left', ensure=True, on=['collection','token_id'], message='s_df x tokens')
	# s_df[s_df.token_id.isnull()]
	# sorted(s_df.collection.unique())
	# np.isinf(s_df).values.sum()
	# s_df['clean_token_id'] = s_df.clean_token_id.fillna(s_df.token_id)
	# s_df['token_id'] = (s_df.clean_token_id).apply(lambda x: re.sub('"', '', str(x))).astype(float).astype(int).astype(str)
	s_df['token_id'] = (s_df.clean_token_id.replace('nan', None).fillna(s_df.token_id.replace('nan', None))).apply(lambda x: re.sub('"', '', str(x)))
	s_df = s_df[s_df.token_id != 'None']
	s_df['token_id'] = s_df.token_id.astype(float).astype(int).astype(str)
	# s_df[s_df.token_id == 'None'].groupby('collection').token_id.count()


	lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique()
	m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ]
	s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ]
	s_df = s_df.drop_duplicates(subset=['collection','token_id','price'])


	###########################
	#     Calculate Floor     #
	###########################
	# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
	# s_df['timestamp'] = s_df.block_timestamp.astype(int)
	# s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)

	# # lowest price in last 20 sales
	# s_df = s_df.sort_values(['collection','block_timestamp'])
	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
	# s_df = s_df.sort_values(['collection','block_timestamp'])
	# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)

	# # exclude sales that are far below the existing floor
	# s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]

	# # 10%ile of last 20 sales
	# s_df = s_df.sort_values(['collection','block_timestamp'])
	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
	# s_df = s_df.sort_values(['collection','block_timestamp'])
	# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
	# s_df['sim'] = 0
	# s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
	# s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)

	listings = pd.read_csv('./data/listings.csv')
	listings = pd.read_csv('./viz/nft_deal_score_listings.csv')
	sorted(listings.collection.unique())
	if supplement_with_listings:
		pred_price = pd.read_csv('./data/pred_price.csv')
		pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
		pred_price['token_id'] = pred_price.token_id.astype(str)
		listings['collection'] = listings.collection.apply(lambda x: clean_name(x))
		listings['block_timestamp'] = s_df.block_timestamp.max()
		listings['token_id'] = listings.token_id.astype(str)
		# listings = listings[listings.collection.isin(pred_price.collection.unique())]
		floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']]
		tmp = merge(listings, pred_price, ensure=False)
		tmp = tmp[ (tmp.price < tmp.pred_price) | ((tmp.collection == 'Galactic Angels') & (tmp.pred_price >= 20) & (tmp.price < (tmp.pred_price * 2))) ]
		tmp_1 = tmp[ (tmp.price * 1.25) < tmp.pred_price ]
		tmp_2 = tmp[ (tmp.price * 1.50) < tmp.pred_price ]
		tmp = tmp.append(tmp_1).append(tmp_2)
		# tmp[tmp.block_timestamp.isnull()]
		# tmp.block_timestamp = s_df.timestamp.max()
		tmp['timestamp'] = tmp.block_timestamp.astype(int)
		tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
		tmp = merge(tmp, floor, ensure=False)

		n = round(len(s_df) / 5000)
		n = max(1, min(3, n))
		print('Supplement with {}x listings'.format(n))
		# n = 1
		for _ in range(n):
			s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
			# tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price]
			# s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
			# tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price]
			# tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])


	########################
	#     Other Things     #
	########################
	# coefsdf = pd.DataFrame()
	# salesdf = pd.DataFrame()
	# attributes = pd.DataFrame()
	# pred_price = pd.DataFrame()
	# feature_values = pd.DataFrame()
	coefsdf = pd.read_csv('./data/coefsdf.csv')
	salesdf = pd.read_csv('./data/model_sales.csv')
	attributes = pd.read_csv('./data/attributes.csv')
	pred_price = pd.read_csv('./data/pred_price.csv')
	pred_price[ (pred_price.collection == 'Solana Monkey Business') & (pred_price.token_id == 1141)]
	feature_values = pd.read_csv('./data/feature_values.csv')
	# non-binary in model: collection_rank, temperature, weight
	# non-binary in model; exclude from rarity: pct, rank, score
	# exclude from model: lucky_number, shower
	# exclude from model and rarity %: meteor_id, attribute_count, cracking_date
	ALL_NUMERIC_COLS = ['rank','score','pct','Pct']
	ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2']
	MODEL_EXCLUDE_COLS = {
		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
		'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight','collection_rank_group']
		, 'Solana Monkey Business': ['Clothes_Diamond']
	}
	MODEL_INCLUDE_COLS = {
		# 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black']
		'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana']
	}
	RARITY_EXCLUDE_COLS = {
		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
		'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group']
	}
	NUMERIC_COLS = {
		'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank']
	}
	ATT_EXCLUDE_COLS = {
		'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group']
	}

	collection = 'Aurory'
	collection = 'Solana Monkey Business'
	collection = 'DeGods'
	# for collection in [ 'Solana Monkey Business' ]:
	# for collection in [ 'Aurory' ]:
	# for collection in [ 'Aurory','Solana Monkey Business' ]:
	sorted(pred_price.collection.unique())
	sorted(s_df.collection.unique())
	# print(sorted(m_df.collection.unique()))
	# for collection in s_df.collection.unique():
	saved_params = {}
	file_to_store = open('./objects/saved_params.pickle', 'rb')
	saved_params = pickle.load(file_to_store)
	collection = 'Aurory'
	collection = 'Levana Dragon Eggs'
	collection = 'Galactic Punks'
	collection = 'Stoned Ape Crew'
	collections = ['Levana Dragon Eggs']
	collections = ['Solana Monkey Business']
	collections = ['Galactic Angels']
	# collections = [ x for x in collections if not x in ['BAYC','MAYC','Bakc'] ]
	collection = 'BAYC'
	collection = 'MAYC'
	collections = ['BAYC']
	collections = ['MAYC']
	collections = [ x for x in collections if not x in ['Bakc','BAKC','MAYC'] ]
	collections = [ x for x in collections if not x in ['Astrals','Cets on Cleck','DeFi Pirates'] ]
	s_df.groupby('collection').block_timestamp.max()
	m_df[m_df.collection.isin(['Okay Bears','Catalina Whale Mixer'])]
	s_df[s_df.collection.isin(['Okay Bears','Catalina Whale Mixer'])]
	collections = ['Okay Bears']
	collections = ['Cets on Creck']
	collections = [ c for c in collections if not c in [ 'Galactic Punks','LunaBulls','Galactic Angels','Levana Dragon Eggs','BAKC','BAYC','Astrals','MAYC' ] ]
	collections = [ c for c in collections if not c in [ 'Okay Bears','Stoned Ape Crew','Cets on Creck' ] ]
	collections = ['SOLGods']
	collections = ['Cets on Creck','Pesky Penguins']
	collections = ['Just Ape.','Bubblegoose Ballers']
	collections = ['Bubblegoose Ballers']
	print(sorted(collections))
	salesdf.token_id.values[:3]
	salesdf['token_id'] = salesdf.token_id.astype(int)
	listings['token_id'] = listings.token_id.astype(int)
	sorted(m_df.collection.unique())
	sorted(pred_price.collection.unique())
	rem = m_df[['collection','feature_name']].drop_duplicates().groupby('collection').feature_name.count().reset_index().sort_values('feature_name')
	rem = rem[rem.feature_name <= 5].collection.unique()
	collections = list(s_df[['collection']].drop_duplicates().merge(m_df[['collection']].drop_duplicates()).collection.unique())
	collections = [ c for c in collections if not c in rem ]

	# DeGods
	m_df[m_df.collection=='Astrals']
	rem = sorted(pred_price.collection.unique())
	collections = [ c for c in collections if not c in rem ]
	rem = [ 'Levana Dragon Eggs','LunaBulls' ]
	collections = [ c for c in collections if not c in rem ]
	# collections = [ 'Catalina Whale Mixer', 'Okay Bears', 'Pesky Penguins' ]
	for i in range(2):
		for collection in collections[1:]:
			# try:
			# if collection in ['Astrals','Bakc','BAKC']+[ 'Catalina Whale Mixer', 'Okay Bears', 'Pesky Penguins' ]:
			if collection in ['Bakc','BAKC','Levana Dragon Eggs','LunaBulls']:
				continue
			# if collection in ['DeGods']:
			# 	continue
			if not collection in saved_params.keys():
				saved_params[collection] = {}
			coefsdf = coefsdf[coefsdf.collection != collection]
			salesdf = salesdf[salesdf.collection != collection]
			attributes = attributes[attributes.collection != collection]
			pred_price = pred_price[pred_price.collection != collection]
			feature_values = feature_values[feature_values.collection != collection]
			print('\nWorking on collection {}'.format(collection))
			sales = s_df[ s_df.collection == collection ]
			sales[sales.sim==0].block_timestamp.max()
			metadata = m_df[ m_df.collection == collection ].drop_duplicates(subset=['token_id','feature_name'], keep='last')
			metadata.feature_name.unique()
			metadata = metadata[metadata.feature_name != 'Genesis Role?']
			if collection == 'Degen Dojo':
				metadata = metadata[metadata.feature_name != 'Encrypted Traits']
			metadata[metadata.token_id=='1']
			metadata[metadata.feature_name=='Genesis Role?'].feature_value.unique()
			sorted(metadata.feature_name.unique())
			# metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False)
			# metadata[metadata.token_id == '1']
			metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() )
			metadata[metadata.token_id == '1']
			metadata[metadata.feature_name == 'rank']
			metadata = metadata[-metadata.feature_name.isin(['rank','pct','Pct','ipfs_image'])]
			metadata.feature_name.unique()
			metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')]

			# categorize columns
			all_names = sorted(metadata.feature_name.unique())
			model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else []
			num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
			num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
			num_metadata = metadata[metadata.feature_name.isin(num_features)]
			num_metadata[num_metadata.feature_name == 'nft_rank']
			cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
			cat_metadata = metadata[metadata.feature_name.isin(cat_features)]

			# create dummies for binary variables
			num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
			num_metadata.columns = [ 'collection','token_id' ] + num_features

			# create dummies for binary variables
			cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
			cat_metadata.columns = [ 'collection','token_id' ] + cat_features
			# cat_metadata = calculate_percentages( cat_metadata, cat_features )
			dummies = pd.get_dummies(cat_metadata[cat_features])
			# dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False)
			if collection == 'Solana Monkey Business':
				# dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int)
				# dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int)
				# dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int)
				# dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & ((dummies['Hat_Cop Hat']==1))).astype(int)
				# dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int)
				dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int)
				# dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int)
				dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int)
				dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int)
				# del dummies['matching_white']
				# del dummies['matching_black']

			cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
			# del cat_metadata['pct']

			for c in model_exclude:
				if c in dummies.columns:
					del dummies[c]
			pred_cols = num_features + list(dummies.columns)
			pred_cols = [ c for c in pred_cols if not c in model_exclude+['Matching_No'] ]

			if len(sales) < 1000:
				pred_cols = [ x for x in pred_cols if 'rank' in x or 'is_top_' in x ]

			# create training df
			sales['token_id'] = sales.token_id.astype(str)
			num_metadata['token_id'] = num_metadata.token_id.astype(str)
			df = merge(sales, num_metadata, ['collection','token_id'], ensure=False)
			df = merge(df, cat_metadata, ['collection','token_id'], ensure=False)
			assert(len(df.columns) < 1000)
			list(df.columns)[100:]
			list(df.columns)[-150:]

			# test dataFrame
			ensure = not collection in ['Aurory','Stoned Ape Crew']
			test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=False)

			# if collection == 'Solana Monkey Business':
			# 	hat = metadata[ metadata.feature_name == 'Hat' ]
			# 	hat['color'] = hat.feature_value.apply(lambda x: re.split(' ', x)[0] )
			# 	clothes = metadata[ metadata.feature_name == 'Clothes' ]
			# 	clothes['color'] = clothes.feature_value.apply(lambda x: re.split(' ', x)[0] )
			# 	matching = hat[['token_id','color']].merge(clothes[['token_id','color']])
			# 	app = cat_metadata[ (dummies.matching_top == 1) | (dummies.matching_cop == 1) ][['token_id']]
			# 	matching = matching[['token_id']].append(app[['token_id']]).drop_duplicates()
			# 	matching['matching'] = 1
			# 	del dummies['matching_cop']
			# 	del dummies['matching_top']
			# 	# dummies = merge(dummies, matching, on=['token_id'], how='left').fillna(0)
			# 	df = merge(df, matching, on=['token_id'], how='left').fillna(0)
			# 	test = merge(test, matching, on=['token_id'], how='left').fillna(0)
			# 	pred_cols.append('matching')

			for c in num_features:
				df[c] = df[c].apply(lambda x: just_float(x))
				test[c] = test[c].apply(lambda x: just_float(x) )

			#################################
			#     Create Test DataFrame     #
			#################################
			tail = df.sort_values('timestamp').tail(1)
			if collection == 'Solana Monkey Business':
				test.loc[ test.token_id == '903', 'nft_rank' ] = 18
			for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
				if c in tail.columns:
					test[c] = tail[c].values[0]

			for tmp in [df, test]:
				for i in [100, 250, 1000]:
					if collection in ['Levana Dragon Eggs']:
						tmp['is_top_{}'.format(i)] = (tmp.collection_rank <= i).astype(int)
					else:
						tmp['is_top_{}'.format(i)] = (tmp.nft_rank <= i).astype(int)
			pred_cols += [ 'is_top_100','is_top_250','is_top_1000' ]
			if 'collection_rank' in pred_cols:
				pred_cols = [ x for x in pred_cols if not x in ['nft_rank'] ]
			df.sort_values('price', ascending=0)[['price']].head(20)
			# df.groupby(['rarity','weight']).price.mean()

			# create target cols
			target_col = 'adj_price'
			df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
			# df['mn_20'] = df.mn_20 * 1.01
			df = df[df[target_col].notnull()]
			df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
			df['rel_price_0'] = df[target_col] - df.mn_20
			df['rel_price_1'] = df[target_col] / df.mn_20
			df = df[df.mn_20 > 0]
			df['log_mn_20'] = np.log(df.mn_20)
			print('Training on {} sales'.format(len(df)))
			df = standardize_df(df, pred_cols)
			test = standardize_df(test, pred_cols, df)

			std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ]
			std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ]
			df.sort_values('rel_price_0', ascending=0).head()[['token_id','nft_rank','price','rel_price_0']]

			#########################
			#     Run the Model     #
			#########################
			tmp = df[std_pred_cols].count().reset_index()
			tmp.columns = ['a','b']
			tmp.sort_values('b').head(20)
			rem = list(tmp[tmp.b==0].a.values)
			std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
			# if collection == 'Levana Dragon Eggs':
			# 	std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
			mn = df.timestamp.min()
			mx = df.timestamp.max()
			a_week_ago = (time.time() * 1000000000) - (60 * 60 * 24 * 7 * 1000000000)
			df['wt'] = df.timestamp.apply(lambda x: 4.0 ** ((x - mn) / (mx - mn)) )
			df.loc[ df.timestamp >= a_week_ago, 'wt' ] = 5
			df['wt'] = df.apply(lambda x: 0 if (x['train_exclude']==1 and (x['train_exclude_price'] != x['train_exclude_price'] or x['train_exclude_price'] == x['price'])) else x['wt'], 1 )
			df.loc[ (df.collection == 'Aurory') & (df.block_timestamp <= '2021-09-05'), 'wt' ] = 0.05
			if collection == 'Levana Dragon Eggs':
				df['wt'] = 1
			#     df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) )
			#     df.sort_values('price', ascending=0)[['price','wt']].head(20)
			# std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ]
			# cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
			# cur_std_pred_cols = deepcopy(std_pred_cols)
			# g = df[std_pred_cols].sum().reset_index()
			# g.columns = [ 'col','cnt' ]
			# g = g.sort_values('cnt')
			# g.head(20)
			if collection == 'Solana Monkey Busines':
				df.loc[ df.token_id == '903', 'nft_rank' ] = 18
				df[df.token_id=='903']
				df[df.token_id==903]

			# CUR_FLOOR = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
			CUR_FLOOR = listings[(listings.collection == collection) & (listings.price.notnull())].price.min()
			print('CUR_FLOOR = {}'.format(CUR_FLOOR))
			if not CUR_FLOOR:
				continue
			df['tmp'] = df.nft_rank.apply(lambda x: int(x / 1000) )
			df.groupby('tmp').rel_price_0.mean()
			df.groupby('tmp').rel_price_0.median()
			df.groupby('tmp').rel_price_1.median()
			df.groupby('tmp').rel_price_1.mean()
			df[[ 'nft_rank','rel_price_0' ]].to_csv('~/Downloads/tmp.csv')
			if collection == 'MAYC':
				df = df[-((df.rel_price_0 >= 100) & (df.nft_rank > 1000))]
				df = df[(df.mn_20 >= 1)]
			if collection == 'BAYC':
				df = df[(df.mn_20 >= 10)]
			if collection == 'MAYC':
				df = df[(df.mn_20 >= 2)]
			df.sort_values('price', ascending=0).head(30)[['token_id','price','rel_price_0','rel_price_1','nft_rank','block_timestamp']]
			df.sort_values('rel_price_1', ascending=0).head(30)[['token_id','price','rel_price_0','rel_price_1','nft_rank','block_timestamp']]
			df.sort_values('rel_price_1', ascending=0).head(100)[['token_id','mn_20','price','rel_price_0','rel_price_1','nft_rank','block_timestamp']].to_csv('~/Downloads/tmp.csv', index=False)

			df = df.reset_index(drop=True)
			X = df[std_pred_cols].values
			wt = df['wt'].values
			y_0 = df.rel_price_0.values
			y_1 = df.rel_price_1.values
			df.sort_values('price', ascending=0).head(15)[['price','token_id','nft_rank','block_timestamp']]
			df[df.sim == 0].block_timestamp.max()

			for target_col in [ 'rel_price_0', 'rel_price_1' ]:
				it = target_col[-1]
				y_val = df[target_col].values
				print('target_col = {}'.format(target_col))
				mn = -1
				cols = [ 'std_nft_rank','std_adj_nft_rank_0','std_adj_nft_rank_1','std_adj_nft_rank_2' ]
				clf = Ridge(alpha = 1)
				# while mn < 0 and len(cols):
				# 	clf.fit(df[cols].values, y_val, df.wt.values)
				# 	coefs = get_coefs(cols, clf.coef_)
				# 	mn = min(coefs.val) if len(coefs) else 0
				# 	if mn < 0:
				# 		cols.remove(coefs.col.values[-1])

				col = 'rarity_value_'+it
				model = 'ridge'
				df[col] = 0
				test[col] = 0
				# df, bst_p, bst_r = ku.get_bst_params( model, df, df[cols].values, y_val, target_col, col, verbose = True, wt_col='wt'  )
				# test = ku.apply_model( model, bst_p, df, test, cols, target_col, col)

				# df['rarity_value_'+it] = clf.predict(df[cols].values)
				rar_adj_target_col = 'rar_adj_'+target_col
				df[rar_adj_target_col] = df[target_col] - df['rarity_value_'+it]
				# test[rar_adj_target_col] = test[target_col] - test['rarity_value_'+it]
				y_val_rar_adj = df[rar_adj_target_col].values
				models = ['las','ridge'] if target_col == 'rel_price_1' or len(sales) < 1000 else ['las','ridge','rfr']
				for model in models:
					cur_std_pred_cols = deepcopy(std_pred_cols)
					print(model)
					y = y_val_rar_adj if model in ['rfr'] else y_val
					col = 'y_pred_{}_{}'.format(model, it)
					params = [saved_params[collection][col]] if col in saved_params[collection].keys() and use_saved_params else []
					df, bst_p, bst_r = ku.get_bst_params( model, df, X, y, target_col, col, verbose = True, wt_col='wt', params = params )
					saved_params[collection][col] = bst_p

					# if model == 'ridge':
					# 	while len(cur_std_pred_cols) > 50:
					# 		coefs = get_coefs(cur_std_pred_cols, clf.coef_)
					# 		cur_std_pred_cols.remove(coefs.col.values[-1])
					# 		new_X = df[cur_std_pred_cols].values
					# 		clf = ku.get_model(model, bst_p)
					# 		clf.fit(new_X, y)
					# 		# coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it))
					# 	new_X = df[cur_std_pred_cols].values
					# 	df, bst_p, bst_r = ku.get_bst_params( model, df, new_X, y, target_col, col, verbose = True, wt_col='wt' )

					if model in ['las','ridge']:
						clf = ku.get_model(model, bst_p)
						clf.fit(X, y, wt)
						coefs = get_coefs(cur_std_pred_cols, clf.coef_)
						mn = coefs.val.min()
						while mn < 0:
							cur_std_pred_cols = [ c for c in coefs[coefs.val >= 0 ].col.unique() ]
							X_new = df[cur_std_pred_cols].values
							clf.fit(X_new, y, wt)
							coefs = get_coefs(cur_std_pred_cols, clf.coef_)
							mn = coefs.val.min()
							if mn >= 0:
								df, bst_p, bst_r = ku.get_bst_params( model, df, X_new, y, target_col, col, verbose = True, wt_col='wt', params = [bst_p] )
						coefs['col'] = coefs.col.apply(lambda x: re.sub('std_', '', x) )
						coefs['n'] = 0
						n = pd.DataFrame()
						for c in cat_metadata.columns:
							if not c in [ 'collection','token_id' ]:
								coefs.loc[ coefs.col == c, 'n' ] = len(cat_metadata[cat_metadata[c] == 1])
						coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it), index=False)

					test = ku.apply_model( model, bst_p, df, test, cur_std_pred_cols, target_col, col)
					if model in ['rfr']:
						df[col] = df[col] + df['rarity_value_'+it]
						test[col] = test[col] + test['rarity_value_'+it]

				mn = -1
				cols = [ c for c in df.columns if c[:7] == 'y_pred_' and c[-1] == it ]
				clf = LinearRegression()
				df[cols].mean()
				df[cols].median()
				test[cols].mean()
				test[cols].median()
				while mn < 0 and len(cols):
					clf.fit(df[cols].values, df[target_col].values)
					coefs = get_coefs(cols, clf.coef_)
					mn = min(coefs.val) if len(coefs) else 0
					if mn < 0:
						cols.remove(coefs.col.values[-1])
					else:
						print(coefs)
				if it == '0':
					df['pred_lin'] = clf.predict(df[cols].values) + df.mn_20
					test['pred_lin'] = clf.predict(test[cols].values) + CUR_FLOOR
					# df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
				else:
					df['pred_log'] = clf.predict(df[cols].values)
					df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
					test['pred_log'] = clf.predict(test[cols].values)
					test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * CUR_FLOOR

			clf = LinearRegression(fit_intercept=False)
			target_col = 'adj_price'
			clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
			score = clf.score( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
			tmp = df[['token_id','block_timestamp','wt','mn_20','pred_lin','pred_log','price','nft_rank']]
			tmp['block_timestamp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
			tmp['err_0'] = tmp.pred_lin - tmp.price
			tmp['err_1'] = tmp.pred_log / tmp.price
			tmp.to_csv('~/Downloads/tmp.csv', index=False)
			print('R-Sq: {}'.format(round(score * 100, 1)))
			# df[['pred_lin','pred_log',target_col]].mean()
			# df[['pred_lin','pred_log',target_col]].median()
			# test[['pred_lin','pred_log']].mean()
			# test[['pred_lin','pred_log']].median()

			print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
			tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], CUR_FLOOR]], columns=['collection','lin_coef','log_coef','floor_price'])
			if clf.coef_[0] < 0:
				print('Only using log')
				df['pred'] = df.pred_log
				test['pred'] = test.pred_log
				tmp['lin_coef'] = 0
				tmp['log_coef'] = 1
			elif clf.coef_[1] < 0:
				print('Only using lin')
				df['pred'] = df.pred_lin
				test['pred'] = test.pred_lin
				tmp['lin_coef'] = 1
				tmp['log_coef'] = 0
			else:
				print('Only using BOTH!')
				df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
				test['pred'] = clf.predict( test[['pred_lin','pred_log']].values )
			coefsdf = coefsdf.append(tmp)
			df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
			df['err'] = df[target_col] - df.pred
			df.head()
			# df[df['std_Attribute count_4']==1]['err']
			df['w_err'] = df.err * df.wt
			# df[df['std_Attribute count_4']==1].sort_values('timestamp')[['err','w_err']].mean()
			# df[(df['std_Attribute count_4']==1)].sort_values('timestamp')[['err','w_err']].mean()
			# df[(df['std_Attribute count_4']==1) & (df.wt>=15)].sort_values('timestamp')[['err','w_err']].mean()
			# df[(df['std_Attribute count_4']==1) & (df.wt>=15)].sort_values('timestamp')[['err','w_err']].sum()
			# df[(df['std_Attribute count_4']==1) & (df.wt<15)].sort_values('timestamp')[['err','w_err']].mean()
			# df[(df['std_Attribute count_4']==1) & (df.wt<15)].sort_values('timestamp')[['err','w_err']].sum()
			# df[df['std_Attribute count_4']==1].sort_values('timestamp')[['err','price','pred','block_timestamp']].tail(20)
			# df[(df['std_Attribute count_4']==1) & (df.wt>=1)].sort_values('timestamp')[['err','price','pred','block_timestamp','wt','w_err']].tail(50)
			# df[df.wt >= 15].wt.sum() / df.wt.sum()
			# df[df.wt < 15].wt.sum()

			recent_errs = []
			recent = df[df.timestamp >= a_week_ago]
			for c in [ c for c in cur_std_pred_cols if len(df[c].unique()) == 2]:
				a = recent[recent[c] == 1]
				recent_errs += [[ c, len(a), a.err.mean(), a.err.sum(), a.err.sum() / recent.price.sum() ]]
			recent_errs = pd.DataFrame(recent_errs, columns=['col','n','avg','tot','rat']).sort_values('tot')
			recent_errs['abs_rat'] = abs(recent_errs.rat)
			recent_errs.sort_values('rat')
			correct = recent_errs[(recent_errs.abs_rat > 0.003) & (recent_errs.n >= 10)]
			if len(correct):
				mx = max(0.001, correct.abs_rat.max())
				correct['chg'] = (correct.abs_rat / mx).apply(lambda x: min(1, x) * .7 ) * correct.avg
				correct['abs_chg'] = abs(correct.chg)
				print(correct.sort_values('chg'))
			for row in correct.iterrows():
				row = row[1]
				c = row['col']
				test['pred'] = test.apply(lambda x: x['pred'] if x[c] == 0 else x['pred'] + row['chg'], 1 )

			# print out some summary stats
			df['err'] = df[target_col] - df.pred
			df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df)
			df['q'] = df.q.apply(lambda x: int(round(x)) )
			df['pct_err'] = (df[target_col] / df.pred) - 1
			pe_mu = df.pct_err.mean()
			pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
			if pe_sd != pe_sd:
				pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
			df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
			if collection == 'Levana Dragon Eggs':
				df['pred_price'] = df.pred.apply(lambda x: x*1.01 )
			df['pred_sd'] = df.pred * pe_sd
			# print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean())
			# print(df.groupby('q')[['err','pred',target_col]].mean())
			# df.err.mean()
			# df[df.weight >= 3.5].err.mean()
			df[df.pred < 200].err.mean()
			df['collection'] = collection
			print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
			# salesdf = salesdf.append( df.rename(columns={'collection_rank':'nft_rank'}).merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
			salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )

			############################################################
			#     Create Predictions for Each NFT in The Collection    #
			############################################################
			# test = merge(num_metadata, cat_metadata, ['collection','token_id'])
			# for c in num_features:
			# 	test[c] = test[c].apply(lambda x: just_float(x) )
			# tail = df.sort_values('timestamp').tail(1)
			# test.loc[ test.token_id == '903', 'nft_rank' ] = 18
			# for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
			# 	if c in tail.columns:
			# 		test[c] = tail[c].values[0]
			# test = standardize_df(test, pred_cols, df)

			# test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values)
			# test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
			# test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values)
			# test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l

			# test['pred_price'] = test.pred.apply(lambda x: x if x < 400 else (x-400)**0.96 + 400 )
			def f(p):
				c = CUR_FLOOR * 2.5
				if collection == 'Degen Apes':
					return( p if p <= c else c+((p-c) ** 0.94) )
				return( p if p <= c else c+((p-c) ** 0.95) )
			test['pred_price'] = test.pred.apply(lambda x: f(x) )
			len(test[test.pred <= CUR_FLOOR * 1.01])
			len(test[test.pred <= CUR_FLOOR * 1.02])
			if not check_exclude:
				test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) )
			if collection == 'BAYC':
				test['pred_price'] = test.pred_price.apply(lambda x: (x*1.03) )
			if collection == 'Galactic Angels':
				test['pred_price'] = test.pred_price.apply(lambda x: (x** 1.05) * 1.2 )

			test['token_id'] = test.token_id.astype(int)
			listings['token_id'] = listings.token_id.astype(int)
			tmp = listings[listings.collection == collection][['token_id','price']].merge(test[['token_id','pred_price']])
			tmp['ratio'] = (tmp.pred_price / tmp.price)
			tmp['is_deal'] = (tmp.ratio > 1).astype(int)
			mx = tmp.ratio.max()
			if mx < 1.15:
				test['pred_price'] = test.pred_price * 1.15 / mx

			# make sure the lowest pred price is the floor
			dff = test.pred_price.min() - CUR_FLOOR
			if dff > 0:
				test['pred_price'] = test.pred_price - dff

			test['pred_sd'] = test.pred_price * pe_sd
			test = test.sort_values(['collection','token_id'])
			test['rk'] = test.pred_price.rank(ascending=0, method='first')
			test['collection'] = collection
			if 'collection_rank' in test.columns and (not 'nft_rank' in test.columns or len(test[test.nft_rank.notnull()]) < len(test[test.collection_rank.notnull()])):
				test['nft_rank'] = test.collection_rank
			pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ).drop_duplicates(subset=['collection','token_id'], keep='last')


			imp = []
			# a = [ 'matching' ] if collection == 'Solana Monkey Business' else []
			for c in list(dummies.columns):
				md = test[test[c] == 1].pred_price.median()
				md_0 = test.pred_price.quantile(0.475)
				imp += [[ collection, c, md_0, md ]]
			# imp = pd.DataFrame(imp, columns=['collection','feature_name',''])
			imp = pd.DataFrame(imp, columns=['collection','col','col_md','md']).sort_values('md', ascending=0)
			imp['pct_vs_baseline'] = ((imp.md / imp.col_md) - 1).apply(lambda x: max(0, x))
			imp['feature_name'] = imp.col.apply(lambda x: re.split('_', x)[0].title() )
			imp['feature_value'] = imp.col.apply(lambda x: re.split('_', x)[1] if '_' in x else None )
			sorted(imp.feature_name.unique())
			imp.loc[imp.col == 'Matching_No', 'pct_vs_baseline'] = 0
			imp[imp.feature_name == 'Attribute Count']
			# if 'matching' in a:
			# 	imp.loc[imp.feature_name == 'Matching', 'feature_value'] = 'Yes'
			# 	test[test.matching==1].to_csv('~/Downloads/tmp1.csv', index=False)
			feature_values = feature_values.append(imp[['collection','feature_name','feature_value','pct_vs_baseline']])

			cols = metadata.feature_name.unique()
			cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ]
			exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else []
			for c in cols:
				cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']]
				l = len(cur.token_id.unique())
				if c in exclude:
					cur['rarity'] = None
				else:
					g = cur.groupby('feature_value').token_id.count().reset_index()
					g['rarity'] = g.token_id / l
					cur = merge(cur, g[['feature_value','rarity']])
				attributes = attributes.append(cur)
			# except:
			# 	print('Error')

		attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title().strip() )
		attributes['feature_value'] = attributes.feature_value.apply(lambda x: str(x).strip() )
		sorted(attributes['feature_name'].unique())
		if len(feature_values):
			feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
		# feature_values = pd.read_csv('./data/feature_values.csv')
		feature_values = feature_values.merge(attributes[['collection','feature_name']].drop_duplicates())
		# n = feature_values[['collection', 'feature_name']].drop_duplicates().groupby(['collection']).feature_name.count().reset_index().rename(columns={'feature_name': 'n'})
		# feature_values = feature_values.merge(n)
		# feature_values['pct_vs_baseline'] = feature_values.pct_vs_baseline / feature_values.n
		# del feature_values['n']
		feature_values[ (feature_values.collection == 'Galactic Angels') ]
		feature_values[ (feature_values.collection == 'Solana Monkey Business') &  (feature_values.feature_name == 'Clothes')  ]
		feature_values[ (feature_values.collection == 'Solana Monkey Business') & (feature_values.feature_name == 'Clothes') & (feature_values.feature_value == 'Poncho') ]
		feature_values[ (feature_values.collection == 'Okay Bears') & (feature_values.feature_name == 'Attribute Count')]
		attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == '1') ]
		attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == 1) ]

		coefsdf.to_csv('./data/coefsdf.csv', index=False)
		salesdf.to_csv('./data/model_sales.csv', index=False)
		# salesdf[salesdf.collection]
		salesdf['block_timestamp'] = salesdf.block_timestamp.apply(lambda x: str(x)[:19] )
		salesdf[salesdf.collection == 'BAYC'].sort_values('block_timestamp', ascending=0).head()[['token_id','block_timestamp','price']]
		salesdf[salesdf.block_timestamp.isnull()]
		salesdf.block_timestamp.max()
		salesdf.groupby('collection').block_timestamp.max()
		pred_price[pred_price.collection == 'SOLGods'].to_csv('~/Downloads/tmp1.csv', index=False)
		# old = pd.read_csv('./data/pred_price.csv')
		# old = old[old.collection == 'DeGods']
		# old['token_id'] = old.token_id.astype(str)
		# old = pred_price.merge(old, on=['collection','token_id'])
		# old['ratio'] = old.pred_price_x / old.pred_price_y
		# old = old.sort_values('ratio')
		# old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'ratio' ]
		# # old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ]
		# m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))]
		# m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index()
		# m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique())
		# m_p.head()
		# old = old.merge(m_p, on=['collection','token_id'])
		# if len(old) and 'rank' in old.columns:
		# 	# old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]]
		# 	old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection','rank']]]
		# 	old.to_csv('~/Downloads/tmp1.csv', index=False)
		# 	pred_price.head()
		# 	old[old.token_id == '4857']
		# 	old.head()
		# 	old.tail()

		# nft_rank = m_df[m_df.feature_name=='nft_rank'][['collection','token_id','feature_value']].rename(columns={'feature_value': 'nft_rank'})
		# nft_rank['token_id'] = nft_rank.token_id.astype(str)
		# pred_price['token_id'] = pred_price.token_id.astype(str)
		# pred_price = pred_price.merge(nft_rank, how='left', on=['collection','token_id'])
		# pred_price = pred_price[pred_price.collection != 'LunaBulls']
		pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
		pred_price = pred_price.drop_duplicates(subset=['collection','token_id'], keep='last')
		pred_price.to_csv('./data/pred_price.csv', index=False)
		# pred_price = pd.read_csv('./data/pred_price.csv')
		pred_price.groupby('collection')[['pred_price']].min()
		attributes.to_csv('./data/attributes.csv', index=False)
		attributes = pd.read_csv('./data/attributes.csv')
		attributes[attributes.rarity.isnull()]
		feature_values.to_csv('./data/feature_values.csv', index=False)
		feature_values[feature_values.collection == 'Galactic Angels'].pct_vs_baseline.unique()
		feature_values[ (feature_values.collection == 'Galactic Angels') & (feature_values.feature_name == 'Background')].feature_value.unique()
		attributes[attributes.collection == 'Galactic Angels'].head()

		# metadata = pd.read_csv('./data/metadata.csv')
		# metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
		# metadata['token_id'] = metadata.token_id.astype(str)
		# metadata.head()
		# nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'})
		# nft_rank['feature_name'] = 'nft_rank'
		# metadata = metadata[metadata.feature_name != 'nft_rank']
		# nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates())
		# metadata = metadata.append(nft_rank)
		# metadata.to_csv('./data/metadata.csv', index=False)


		# feature_values.to_csv('./data/feature_values.csv', index=False)

		file_to_store = open('./objects/saved_params.pickle', 'wb')
		pickle.dump(saved_params, file_to_store)
		# saved_params.keys()

		if True or check_exclude:
			exclude = pd.read_csv('./data/exclude.csv')
			salesdf['rat'] = salesdf.price / salesdf.pred
			salesdf['dff'] = salesdf.price - salesdf.pred
			salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2.5))).astype(int)
			salesdf['rat'] = salesdf.pred / salesdf.price
			salesdf['dff'] = salesdf.pred - salesdf.price
			salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2.5))).astype(int)
			salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
			# print(salesdf.exclude_1.mean())
			# print(salesdf.exclude_2.mean())
			# print(salesdf.exclude.mean())
			salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
			exclude = exclude.append(salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]])
			# salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
			exclude.to_csv('./data/exclude.csv', index=False)
		# tokens[tokens.collection == 'Meerkat Millionaires']
		# tokens[tokens.collection == 'Cets on Creck'].sort_values('nft_rank', ascending=0)
		# tokens[tokens.collection == 'Cets on Creck']
		# tokens = tokens.drop_duplicates(subset=['collection','token_id'], keep='last')
		# tokens['chain'] = tokens.chain.fillna('Solana')
		# tokens['clean_token_id'] = tokens.clean_token_id.fillna(tokens.token_id)
		# tokens.to_csv('./data/tokens.csv', index=False)

# train_model(True, False)
# train_model(False, False)
# train_model(False, True)

# train_model()