mirror of
https://github.com/FlipsideCrypto/nft-deal-score.git
synced 2026-02-06 10:56:58 +00:00
levana
This commit is contained in:
parent
e14a6b539b
commit
9dd1d71538
36
load_data.py
36
load_data.py
@ -55,8 +55,10 @@ def run_queries():
|
||||
metadata = ctx.cursor().execute(' '.join(query))
|
||||
metadata = pd.DataFrame.from_records(iter(metadata), columns=[x[0] for x in metadata.description])
|
||||
metadata = clean_colnames(metadata)
|
||||
metadata['image'] = metadata.image.apply(lambda x: 'https://cloudflare-ipfs.com/ipfs/'+re.split('/', x)[-1] )
|
||||
metadata['collection'] = c
|
||||
metadata['chain'] = 'Terra'
|
||||
list(metadata.image.values[:2]) + list(metadata.image.values[-2:])
|
||||
metadata.to_csv('./data/metadata/{}.csv'.format(c), index=False)
|
||||
# old = pd.read_csv('./data/metadata.csv')
|
||||
# old = old[-old.collection.isin(metadata.collection.unique())]
|
||||
@ -75,7 +77,7 @@ def add_terra_tokens():
|
||||
, msg_value:execute_msg:mint_nft:extension:name AS name
|
||||
, msg_value:execute_msg:mint_nft:extension:image AS image
|
||||
FROM terra.msgs
|
||||
WHERE msg_value:contract::string = 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2'
|
||||
WHERE msg_value:contract::string = 'terra16wuzgsx3tz4hkqu73q5s7unxenefkkvefvewsh'
|
||||
AND tx_status = 'SUCCEEDED'
|
||||
AND msg_value:execute_msg:mint_nft is not null
|
||||
'''
|
||||
@ -164,6 +166,9 @@ def add_terra_metadata():
|
||||
metadata['attribute_count'] = 0
|
||||
l = len(metadata)
|
||||
incl_att_count = not collection in [ 'Levana Dragon Eggs' ]
|
||||
metadata.groupby('cracking_date').token_id.count()
|
||||
metadata.groupby('weight').token_id.count()
|
||||
metadata[metadata.cracking_date=='2471-12-22'][['token_id']]
|
||||
for c in list(metadata.columns) + ['attribute_count']:
|
||||
if c in ['token_id','collection','pct','levana_rank','meteor_id']:
|
||||
continue
|
||||
@ -186,11 +191,18 @@ def add_terra_metadata():
|
||||
# metadata.sort_values('pct_rank')
|
||||
metadata.sort_values('pct')
|
||||
metadata['rank'] = metadata.pct.rank()
|
||||
metadata['score'] = metadata.pct.apply(lambda x: 1.0 / x )
|
||||
mn = metadata.score.min()
|
||||
metadata['score'] = metadata.score.apply(lambda x: x / mn )
|
||||
metadata.score.max()
|
||||
metadata.sort_values('rank')[['rank','pct','score']]
|
||||
metadata['rarity_score'] = metadata.pct.apply(lambda x: 1.0 / (x**0.2) )
|
||||
mn = metadata.rarity_score.min()
|
||||
mx = metadata.rarity_score.max()
|
||||
metadata['rarity_score'] = metadata.rarity_score.apply(lambda x: round(((x - mn) * 999 / (mx - mn)) + 1) )
|
||||
metadata.sort_values('rarity_score', ascending=0).head(20)[['token_id','collection_rank','rarity_score']]
|
||||
metadata.sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score']]
|
||||
metadata[metadata.token_id==6157].sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score','rank']]
|
||||
metadata[metadata['rank']>=3000].groupby('weight').token_id.count()
|
||||
|
||||
metadata.rarity_score.max()
|
||||
metadata.rarity_score.min()
|
||||
metadata.sort_values('rank')[['rank','pct','rarity_score']]
|
||||
|
||||
m = pd.DataFrame()
|
||||
for c in metadata.columns:
|
||||
@ -201,16 +213,20 @@ def add_terra_metadata():
|
||||
m = m.append(cur)
|
||||
m['chain'] = 'Terra'
|
||||
m.groupby('feature_name').feature_value.count()
|
||||
m[m.feature_name=='face'].groupby('feature_value').token_id.count()
|
||||
print(len(m.token_id.unique()))
|
||||
if collection == 'Levana Dragon Eggs':
|
||||
add = m[m.feature_name=='collection_rank']
|
||||
add['feature_name'] = 'transformed_collection_rank'
|
||||
add['feature_value'] = add.feature_value.apply(lambda x: (1.0/ (x + 0.5))**1 )
|
||||
m = m.append(add)
|
||||
g = m.groupby('feature_value').feature_name.count().reset_index().sort_values('feature_name').tail(50)
|
||||
old = pd.read_csv('./data/metadata.csv')
|
||||
if not 'chain' in old.columns:
|
||||
old['chain'] = old.collection.apply(lambda x: 'Terra' if x in [ 'Galactic Punks', 'LunaBulls' ] else 'Solana' )
|
||||
old = old[-old.collection.isin(m.collection.unique())]
|
||||
old = old.append(m)
|
||||
old = old.drop_duplicates()
|
||||
print(old.groupby(['chain','collection']).token_id.count())
|
||||
old = old.drop_duplicates(subset=['collection','token_id','feature_name'])
|
||||
old = old[-(old.feature_name.isin(['last_sale']))]
|
||||
# print(old.groupby(['chain','collection']).token_id.count())
|
||||
print(old[['chain','collection','token_id']].drop_duplicates().groupby(['chain','collection']).token_id.count())
|
||||
old.to_csv('./data/metadata.csv', index=False)
|
||||
|
||||
|
||||
@ -4,8 +4,8 @@ WITH legendary_traits AS (
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:token_id::string as tokenid,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as tokenid,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
msg_value:execute_msg:mint:extension:attributes[2]:value::string as origin,
|
||||
@ -39,7 +39,7 @@ WITH legendary_traits AS (
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as tokenid,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -74,7 +74,7 @@ WITH legendary_traits AS (
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as tokenid,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -109,7 +109,7 @@ WITH legendary_traits AS (
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as tokenid,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
|
||||
@ -5,7 +5,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -38,7 +38,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -74,7 +74,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -109,7 +109,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -144,7 +144,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
@ -179,7 +179,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
|
||||
@ -4,7 +4,7 @@ select block_timestamp,
|
||||
block_id,
|
||||
tx_id,
|
||||
msg_value:execute_msg:mint:extension:name::string as name,
|
||||
CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image,
|
||||
msg_value:execute_msg:mint:extension:image::string as image,
|
||||
msg_value:execute_msg:mint:token_id::string as token_id,
|
||||
msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity,
|
||||
msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank,
|
||||
|
||||
@ -39,16 +39,18 @@ def clean_name(name):
|
||||
def scrape_randomearth():
|
||||
d_address = {
|
||||
'Galactic Punks': 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k',
|
||||
'LunaBulls': 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2'
|
||||
'LunaBulls': 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2',
|
||||
'Levana Dragon Eggs': 'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg',
|
||||
}
|
||||
data = []
|
||||
for collection in [ 'Galactic Punks', 'LunaBulls' ]:
|
||||
# for collection in [ 'Levana Dragon Eggs' ]:
|
||||
for collection in d_address.keys():
|
||||
print(collection)
|
||||
page = 0
|
||||
has_more = True
|
||||
while has_more:
|
||||
page += 1
|
||||
print('Page #{}'.format(page))
|
||||
print('Page #{} ({})'.format(page, len(data)))
|
||||
url = 'https://randomearth.io/api/items?collection_addr={}&sort=price.asc&page={}&on_sale=1'.format( d_address[collection], page)
|
||||
browser.get(url)
|
||||
soup = BeautifulSoup(browser.page_source)
|
||||
@ -59,6 +61,7 @@ def scrape_randomearth():
|
||||
for i in j['items']:
|
||||
data += [[ 'Terra', collection, i['token_id'], i['price'] / (10 ** 6) ]]
|
||||
df = pd.DataFrame(data, columns=['chain','collection','token_id','price'])
|
||||
df.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
old = pd.read_csv('./data/listings.csv')
|
||||
old = old[-old.collection.isin(df.collection.unique())]
|
||||
old = old.append(df)
|
||||
@ -189,9 +192,12 @@ def convert_collection_names():
|
||||
,'boryokudragonz': 'Boryoku Dragonz'
|
||||
}
|
||||
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
|
||||
df = pd.read_csv('./data/{}.csv'.format(c))
|
||||
df['collection'] = df.collection.apply(lambda x: clean_name(x) if x in d.keys() else x )
|
||||
df.to_csv('./data/{}.csv'.format(c), index=False)
|
||||
try:
|
||||
df = pd.read_csv('./data/{}.csv'.format(c))
|
||||
df['collection'] = df.collection.apply(lambda x: clean_name(x) if x in d.keys() else x )
|
||||
df.to_csv('./data/{}.csv'.format(c), index=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
def scrape_recent_sales():
|
||||
o_sales = pd.read_csv('./data/sales.csv')
|
||||
@ -234,6 +240,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk
|
||||
, 'degenapes': 'degen-ape-academy'
|
||||
, 'peskypenguinclub': 'pesky-penguins'
|
||||
}
|
||||
collection = 'smb'
|
||||
for collection in collections:
|
||||
if collection == 'boryokudragonz':
|
||||
continue
|
||||
@ -249,7 +256,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk
|
||||
print('{} page #{} ({})'.format(collection, page, len(data)))
|
||||
sleep(3)
|
||||
page += 1
|
||||
for j in [25, 30, 35, 30, 25] * 2:
|
||||
for j in [20, 30, 30, 30, 30, 30, 30, 30] * 1:
|
||||
for _ in range(1):
|
||||
soup = BeautifulSoup(browser.page_source)
|
||||
# for row in browser.find_elements_by_class_name('ag-row'):
|
||||
@ -325,6 +332,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk
|
||||
|
||||
pred_price = pd.read_csv('./data/pred_price.csv')[['collection','token_id','pred_price','pred_sd']]
|
||||
pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
|
||||
pred_price['token_id'] = pred_price.token_id.astype(str)
|
||||
pred_price = pred_price.merge(listings)
|
||||
|
||||
coefsdf = pd.read_csv('./data/coefsdf.csv')
|
||||
@ -338,7 +346,10 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk
|
||||
|
||||
metadata = pd.read_csv('./data/metadata.csv')
|
||||
solana_blob = metadata[ (metadata.collection == 'aurory') & (metadata.feature_name == 'skin') & (metadata.feature_value == 'Solana Blob (9.72%)')].token_id.unique()
|
||||
pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 8 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 )
|
||||
pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 4 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 )
|
||||
|
||||
solana_blob = metadata[ (metadata.collection == 'aurory') & (metadata.feature_name == 'hair') & (metadata.feature_value == 'Long Blob Hair (9.72%)')].token_id.unique()
|
||||
pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 2 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 )
|
||||
|
||||
pred_price['abs_chg'] = (pred_price.floor - pred_price.floor_price) * pred_price.lin_coef
|
||||
pred_price['pct_chg'] = (pred_price.floor - pred_price.floor_price) * pred_price.log_coef
|
||||
@ -711,6 +722,7 @@ def scratch():
|
||||
# print('Sleeping until {}'.format(sleep_to))
|
||||
# sleep(60 * 15)
|
||||
alerted = []
|
||||
scrape_randomearth()
|
||||
alerted = scrape_listings(alerted = alerted)
|
||||
# scrape_randomearth()
|
||||
# alerted = scrape_listings(['smb'],alerted = alerted)
|
||||
convert_collection_names()
|
||||
528
solana_model.py
528
solana_model.py
@ -10,7 +10,7 @@ import tensorflow as tf
|
||||
import snowflake.connector
|
||||
from datetime import datetime
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.linear_model import LinearRegression, RidgeCV
|
||||
from sklearn.linear_model import LinearRegression, RidgeCV, Lasso
|
||||
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
@ -18,26 +18,45 @@ warnings.filterwarnings('ignore')
|
||||
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
||||
|
||||
CHECK_EXCLUDE = False
|
||||
CHECK_EXCLUDE = True
|
||||
# CHECK_EXCLUDE = True
|
||||
|
||||
# Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400
|
||||
|
||||
###################################
|
||||
# Define Helper Functions #
|
||||
###################################
|
||||
def standardize_df(df, cols, usedf=None):
|
||||
def standardize_df(df, cols, usedf=None, verbose=False):
|
||||
for c in cols:
|
||||
if type(usedf) != type(pd.DataFrame()):
|
||||
usedf = df
|
||||
mu = usedf[c].mean()
|
||||
sd = usedf[c].std()
|
||||
# print(c)
|
||||
if verbose:
|
||||
print(c)
|
||||
if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
|
||||
df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
|
||||
else:
|
||||
df['std_{}'.format(c)] = (df[c] - mu) / sd
|
||||
return(df)
|
||||
|
||||
def merge(left, right, on=None, how='inner', ensure=True, verbose=True):
|
||||
df = left.merge(right, on=on, how=how)
|
||||
if len(df) != len(left) and (ensure or verbose):
|
||||
print('{} -> {}'.format(len(left), len(df)))
|
||||
cur = left.merge(right, on=on, how='left')
|
||||
cols = set(right.columns).difference(set(left.columns))
|
||||
print(cols)
|
||||
col = list(cols)[0]
|
||||
missing = cur[cur[col].isnull()]
|
||||
print(missing.head())
|
||||
if ensure:
|
||||
assert(False)
|
||||
return(df)
|
||||
|
||||
def just_float(x):
|
||||
x = re.sub('[^\d\.]', '', str(x))
|
||||
return(float(x))
|
||||
|
||||
def calculate_percentages(df, cols=[]):
|
||||
add_pct = not 'pct' in df.columns
|
||||
if not len(cols):
|
||||
@ -64,13 +83,9 @@ exclude = [
|
||||
# ( 'aurory', 3323, 138 )
|
||||
]
|
||||
s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
|
||||
s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head()
|
||||
print(len(s_df[s_df.collection == 'Levana Dragon Eggs']))
|
||||
print(s_df.groupby('collection').token_id.count())
|
||||
s_df.collection.unique()
|
||||
s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
|
||||
s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
|
||||
s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
|
||||
s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
|
||||
for e in exclude:
|
||||
s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
|
||||
s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
|
||||
@ -82,241 +97,183 @@ if not CHECK_EXCLUDE:
|
||||
s_df = s_df[s_df.exclude.isnull()]
|
||||
del s_df['exclude']
|
||||
|
||||
#########################
|
||||
# Load Metadata #
|
||||
#########################
|
||||
m_df = pd.read_csv('./data/metadata.csv')
|
||||
m_df['token_id'] = m_df.token_id.astype(str)
|
||||
tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])]
|
||||
tmp['tmp'] = tmp.token_id.astype(int)
|
||||
tmp.groupby('collection').tmp.max()
|
||||
m_df.head()
|
||||
# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') )
|
||||
# remove ones that are not actually metadata
|
||||
m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
|
||||
m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x )
|
||||
m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')]
|
||||
|
||||
|
||||
#####################################
|
||||
# Exclude Special LunaBulls #
|
||||
#####################################
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
tokens.token_id.unique()
|
||||
lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique()
|
||||
m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ]
|
||||
s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ]
|
||||
|
||||
|
||||
###########################
|
||||
# Calculate Floor #
|
||||
###########################
|
||||
s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
|
||||
s_df['timestamp'] = s_df.block_timestamp.astype(int)
|
||||
# del metadata['price']
|
||||
# del metadata['last_sale']
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
|
||||
s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago'])
|
||||
|
||||
s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True)
|
||||
s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
|
||||
# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ]
|
||||
|
||||
s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ]
|
||||
# lowest price in last 20 sales
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
|
||||
|
||||
# exclude sales that are far below the existing floor
|
||||
s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
|
||||
|
||||
# 10%ile of last 20 sales
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True)
|
||||
s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
|
||||
s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40)
|
||||
s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price')
|
||||
s_df['tmp'] = s_df.mn_20 / s_df.md_20
|
||||
|
||||
tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']]
|
||||
tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
|
||||
tmp = tmp.groupby('date').mn_20.median().reset_index()
|
||||
tmp.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
|
||||
s_df['tmp'] = s_df.price / s_df.mn_20
|
||||
s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']]
|
||||
s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']]
|
||||
s_df.groupby('collection').tmp.median()
|
||||
s_df.groupby('collection').tmp.mean()
|
||||
|
||||
s_df.sort_values('tmp').head()
|
||||
s_df['tmp'] = s_df.price / s_df.mn_20
|
||||
s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False)
|
||||
s_df.groupby('collection').tmp.median()
|
||||
s_df.groupby('collection').tmp.mean()
|
||||
s_df.sort_values('tmp', ascending=0).head()
|
||||
s_df.head(21)
|
||||
m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
|
||||
# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() )
|
||||
# m_df.feature_value.unique()
|
||||
pred_cols = {}
|
||||
metadata = {}
|
||||
sales = {}
|
||||
collection_features = {}
|
||||
m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id')
|
||||
c = 'Galactic Punks'
|
||||
EXCLUDE_COLS = {
|
||||
'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
|
||||
}
|
||||
for c in s_df.collection.unique():
|
||||
print('Building {} model'.format(c))
|
||||
sales[c] = s_df[ s_df.collection == c ]
|
||||
exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else []
|
||||
pred_cols[c] = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if not x in exclude])
|
||||
collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ]
|
||||
metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ]
|
||||
|
||||
# tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] )
|
||||
metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
|
||||
metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c]
|
||||
|
||||
features = collection_features[c]
|
||||
cur = metadata[c]
|
||||
cur = cur.dropna(subset=features)
|
||||
for f in features:
|
||||
if type(cur[f].values[0] == str):
|
||||
cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) )
|
||||
cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip())
|
||||
cur = cur.replace('', 'Default')
|
||||
# if not 'pct' in cur.columns:
|
||||
cur = calculate_percentages( cur, features )
|
||||
dummies = pd.get_dummies(cur[features])
|
||||
feature_cols = dummies.columns
|
||||
cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
|
||||
metadata[c] = cur
|
||||
# pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns)
|
||||
cols = [ 'collection_rank' ]
|
||||
cols = [ ]
|
||||
pred_cols[c] = [ 'rank','transform_rank','score'] + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns)
|
||||
|
||||
# collection_features = {
|
||||
# 'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ]
|
||||
# , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ]
|
||||
# , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# }
|
||||
|
||||
excludedf = pd.DataFrame()
|
||||
###########################
|
||||
# Calculate Floor #
|
||||
###########################
|
||||
coefsdf = pd.DataFrame()
|
||||
salesdf = pd.DataFrame()
|
||||
attributes = pd.DataFrame()
|
||||
pred_price = pd.DataFrame()
|
||||
feature_values = pd.DataFrame()
|
||||
collections = sorted(metadata.keys())
|
||||
collection = 'Galactic Punks'
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
collection = 'Levana Dragon Eggs'
|
||||
# for collection in s_df.collection.unique():
|
||||
for collection in ['Levana Dragon Eggs']:
|
||||
# collection = 'LunaBulls'
|
||||
# collection = 'smb'
|
||||
# collection = 'aurory'
|
||||
# collection = 'meerkatmillionaires'
|
||||
# non-binary in model: collection_rank, temperature, weight
|
||||
# non-binary in model; exclude from rarity: pct, rank, score
|
||||
# exclude from model: lucky_number, shower
|
||||
# exclude from model and rarity %: meteor_id, attribute_count, cracking_date
|
||||
ALL_NUMERIC_COLS = ['rank','score','pct']
|
||||
MODEL_EXCLUDE_COLS = {
|
||||
# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
|
||||
'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count']
|
||||
}
|
||||
RARITY_EXCLUDE_COLS = {
|
||||
# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
|
||||
'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score']
|
||||
}
|
||||
NUMERIC_COLS = {
|
||||
'Levana Dragon Eggs': ['collection_rank','weight','temperature','transformed_collection_rank','rarity_score']
|
||||
}
|
||||
ATT_EXCLUDE_COLS = {
|
||||
'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank']
|
||||
}
|
||||
# for collection in [ 'Levana Dragon Eggs' ]:
|
||||
for collection in s_df.collection.unique():
|
||||
print('Working on collection {}'.format(collection))
|
||||
p_metadata = metadata[collection]
|
||||
if 'attribute_count' in p_metadata.columns:
|
||||
p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int)
|
||||
|
||||
p_sales = sales[collection]
|
||||
# specify the predictive features
|
||||
p_pred_cols = pred_cols[collection]
|
||||
p_features = collection_features[collection]
|
||||
p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
for c in [ 'rank','score' ]:
|
||||
p_metadata[c] = p_metadata[c].astype(float)
|
||||
# p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
# p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
p_sales['contract_address'] = ''
|
||||
p_metadata['contract_address'] = ''
|
||||
sales = s_df[ s_df.collection == collection ]
|
||||
metadata = m_df[ m_df.collection == collection ]
|
||||
metadata[metadata.token_id == '1']
|
||||
metadata[metadata.feature_name == 'rank']
|
||||
metadata.feature_name.unique()
|
||||
|
||||
# remove 1 columns for each group (since they are colinear)
|
||||
# exclude = []
|
||||
# for f in p_features:
|
||||
# e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1]
|
||||
# exclude.append(e)
|
||||
# categorize columns
|
||||
all_names = sorted(metadata.feature_name.unique())
|
||||
model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else []
|
||||
num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
|
||||
num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
|
||||
num_metadata = metadata[metadata.feature_name.isin(num_features)]
|
||||
num_metadata[num_metadata.feature_name == 'rank']
|
||||
cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
|
||||
cat_metadata = metadata[metadata.feature_name.isin(cat_features)]
|
||||
|
||||
df = p_sales.merge(p_metadata, on=['token_id','contract_address'])
|
||||
df = df[df.mn_20.notnull()]
|
||||
# create dummies for binary variables
|
||||
num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
|
||||
num_metadata.columns = [ 'collection','token_id' ] + num_features
|
||||
|
||||
# create dummies for binary variables
|
||||
cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
|
||||
cat_metadata.columns = [ 'collection','token_id' ] + cat_features
|
||||
cat_metadata = calculate_percentages( cat_metadata, cat_features )
|
||||
dummies = pd.get_dummies(cat_metadata[cat_features])
|
||||
cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
|
||||
del cat_metadata['pct']
|
||||
|
||||
pred_cols = num_features + list(dummies.columns)
|
||||
|
||||
# create training df
|
||||
df = merge(sales, num_metadata, ['collection','token_id'], ensure=False)
|
||||
df = merge(df, cat_metadata, ['collection','token_id'])
|
||||
for c in num_features:
|
||||
df[c] = df[c].apply(lambda x: just_float(x))
|
||||
|
||||
# create target cols
|
||||
target_col = 'adj_price'
|
||||
df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
|
||||
# df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 )
|
||||
# tmp = df[['block_timestamp','mn_20']].copy()
|
||||
# tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
|
||||
# tmp = tmp.groupby('tmp').mn_20.median().reset_index()
|
||||
# tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False)
|
||||
# df['timestamp'] = df.block_timestamp.astype(int)
|
||||
df = df[df[target_col].notnull()]
|
||||
df = df.reset_index(drop=True)
|
||||
df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) )
|
||||
df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
|
||||
df['rel_price_0'] = df[target_col] - df.mn_20
|
||||
df['rel_price_1'] = df[target_col] / df.mn_20
|
||||
df = df[df.mn_20 > 0]
|
||||
df['log_mn_20'] = np.log(df.mn_20)
|
||||
print('Training on {} sales'.format(len(df)))
|
||||
# df['price_median'] = df.groupby('token_id').price.median()
|
||||
df = standardize_df(df, pred_cols)
|
||||
|
||||
# standardize columns to mean 0 sd 1
|
||||
len(p_pred_cols)
|
||||
df = standardize_df(df, p_pred_cols)
|
||||
std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ]
|
||||
# p_pred_cols = [ c for c in p_pred_cols if not c in exclude ]
|
||||
std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ]
|
||||
df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
|
||||
# df.sort_values('block_timestamp').head(10)[['price','tx_id']]
|
||||
# df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values
|
||||
# df = df[df.price >= 1]
|
||||
std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ]
|
||||
std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ]
|
||||
|
||||
#########################
|
||||
# Run the Model #
|
||||
#########################
|
||||
len(df)
|
||||
len(df.dropna(subset=std_pred_cols))
|
||||
tmp = df[std_pred_cols].count().reset_index()
|
||||
tmp.columns = ['a','b']
|
||||
tmp.sort_values('b').head(20)
|
||||
rem = list(tmp[tmp.b==0].a.values)
|
||||
std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
|
||||
# if collection == 'Levana Dragon Eggs':
|
||||
# std_pred_cols = [ 'std_genus_Titan','std_score','std_weight','std_transformed_collection_rank','std_collection_rank','std_legendary_composition_None','std_ancient_composition_None' ]
|
||||
mn = df.timestamp.min()
|
||||
mx = df.timestamp.max()
|
||||
df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) )
|
||||
df['wt'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) )
|
||||
X = df[std_pred_cols].values
|
||||
mu = df.log_price.mean()
|
||||
sd = df.log_price.std()
|
||||
df['std_log_price'] = (df.log_price - mu) / sd
|
||||
# y = df.std_log_price.values
|
||||
# y = df[target_col].values
|
||||
# y = df.rel_price_1.values
|
||||
y_0 = df.rel_price_0.values
|
||||
y_1 = df.rel_price_1.values
|
||||
# y_log = df.log_price.values
|
||||
|
||||
clf_lin = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_lin.fit(X, y_0, df.weight.values)
|
||||
|
||||
# run the linear model
|
||||
clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
# clf_lin = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_lin.fit(X, y_0, df.wt.values)
|
||||
if collection == 'Levana Dragon Eggs':
|
||||
coefs = []
|
||||
for a, b in zip(std_pred_cols, clf_lin.coef_):
|
||||
coefs += [[a,b]]
|
||||
coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
|
||||
coefs.to_csv('~/Downloads/levana_lin_coefs.csv', index=False)
|
||||
df['pred_lin'] = clf_lin.predict(X)
|
||||
df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
|
||||
df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) )
|
||||
# df['err_lin'] = abs(df.pred_lin - df.price )
|
||||
# df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50)
|
||||
df.head()
|
||||
clf_log = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_log.fit(X, y_1, df.weight.values)
|
||||
|
||||
# run the log model
|
||||
clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
# clf_log = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_log.fit(X, y_1, df.wt.values)
|
||||
if collection == 'Levana Dragon Eggs':
|
||||
coefs = []
|
||||
for a, b in zip(std_pred_cols, clf_lin.coef_):
|
||||
coefs += [[a,b]]
|
||||
coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
|
||||
coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False)
|
||||
df['pred_log'] = clf_log.predict(X)
|
||||
df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
|
||||
df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) )
|
||||
df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50)
|
||||
|
||||
df['err'] = df.err_lin * df.err_log
|
||||
|
||||
df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50)
|
||||
df['collection'] = collection
|
||||
excludedf = excludedf.append(df[df.err > 2][['collection','token_id','price']])
|
||||
# df = df[df.err < 2]
|
||||
|
||||
print(round(len(df[df.err > 2]) * 100.0 / len(df), 2))
|
||||
|
||||
df[(df.err_log > 1) & (df.err_lin >= 5)]
|
||||
|
||||
clf_log = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_log.fit(X, y_1, df.weight.values)
|
||||
|
||||
clf_log = RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_log.fit(X, y_1, df.weight.values)
|
||||
df['pred_lin'] = clf_lin.predict(X)
|
||||
df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
|
||||
# df['pred_log'] = np.exp(clf_log.predict(X))
|
||||
df['pred_log'] = clf_log.predict(X)
|
||||
df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
|
||||
# combine the models
|
||||
clf = LinearRegression(fit_intercept=False)
|
||||
clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values )
|
||||
clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
|
||||
print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
|
||||
l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
|
||||
tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price'])
|
||||
@ -335,34 +292,8 @@ for collection in ['Levana Dragon Eggs']:
|
||||
df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
|
||||
coefsdf = coefsdf.append(tmp)
|
||||
df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
|
||||
df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]]
|
||||
# df[df.block_timestamp>='2021-10-01'].err.mean()
|
||||
df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).head(20).tx_id.values
|
||||
|
||||
# print(np.mean(y))
|
||||
# print(np.mean(clf.predict(X)))
|
||||
|
||||
# # run neural net
|
||||
# model = tf.keras.models.Sequential([
|
||||
# tf.keras.layers.Dense(9, activation='relu')
|
||||
# , tf.keras.layers.Dropout(.2)
|
||||
# , tf.keras.layers.Dense(3, activation='relu')
|
||||
# , tf.keras.layers.Dropout(.2)
|
||||
# , tf.keras.layers.Dense(1, activation='linear')
|
||||
# ])
|
||||
# model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025))
|
||||
# model.fit(X, y, epochs=500, validation_split=0.3)
|
||||
|
||||
# df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu)
|
||||
# df['pred'] = model.predict(df[std_pred_cols].values)
|
||||
# ratio = df.price.mean() / df.pred.mean()
|
||||
# print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1)))
|
||||
|
||||
# checking errors
|
||||
# df['pred'] = df.pred * ratio
|
||||
# print out some summary stats
|
||||
df['err'] = df[target_col] - df.pred
|
||||
df['q'] = df.pred.rank() * 10 / len(df)
|
||||
df['q'] = df.q.apply(lambda x: int(round(x)) )
|
||||
@ -373,137 +304,56 @@ for collection in ['Levana Dragon Eggs']:
|
||||
df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
|
||||
df['pred_sd'] = df.pred * pe_sd
|
||||
print(df.groupby('q')[['err','pred',target_col]].mean())
|
||||
print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean())
|
||||
print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean())
|
||||
# df.err.mean()
|
||||
# df[df.weight >= 3.5].err.mean()
|
||||
df['collection'] = collection
|
||||
print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
|
||||
salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) )
|
||||
salesdf = salesdf.append( df[[ 'collection','token_id','block_timestamp','price','pred','mn_20','rank' ]].sort_values('block_timestamp', ascending=0) )
|
||||
|
||||
# create the attributes dataframe
|
||||
for f in p_features:
|
||||
cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]]
|
||||
cur.columns = [ 'token_id', 'value','rarity' ]
|
||||
cur['feature'] = f
|
||||
cur['collection'] = collection
|
||||
attributes = attributes.append(cur)
|
||||
|
||||
# create predictions for each NFT in the collection
|
||||
test = p_metadata.copy()
|
||||
############################################################
|
||||
# Create Predictions for Each NFT in The Collection #
|
||||
############################################################
|
||||
test = merge(num_metadata, cat_metadata, ['collection','token_id'])
|
||||
for c in num_features:
|
||||
test[c] = test[c].apply(lambda x: just_float(x) )
|
||||
tail = df.sort_values('timestamp').tail(1)
|
||||
for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
|
||||
if c in tail.columns:
|
||||
test[c] = tail[c].values[0]
|
||||
test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df)
|
||||
# test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values )
|
||||
# test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values ))
|
||||
test = standardize_df(test, pred_cols, df)
|
||||
|
||||
test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values)
|
||||
test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
|
||||
# test['pred_lin'] = df.pred_lin + df.mn_20
|
||||
# df['pred_log'] = np.exp(clf_log.predict(X))
|
||||
test['pred_log'] = clf_log.predict(test[std_pred_cols].values)
|
||||
test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
|
||||
|
||||
test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
|
||||
# test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio
|
||||
test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) )
|
||||
test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
|
||||
if not CHECK_EXCLUDE:
|
||||
test['pred_price'] = test.pred.apply(lambda x: (x*0.985) )
|
||||
test['pred_sd'] = test.pred * pe_sd
|
||||
test['rk'] = test.pred.rank(ascending=0, method='first')
|
||||
test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) )
|
||||
test['pred_sd'] = test.pred_price * pe_sd
|
||||
test['rk'] = test.pred_price.rank(ascending=0, method='first')
|
||||
test['collection'] = collection
|
||||
pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') )
|
||||
# print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price'))
|
||||
pred_price = pred_price.append( test[[ 'collection','token_id','rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') )
|
||||
|
||||
cols = metadata.feature_name.unique()
|
||||
cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ]
|
||||
exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else []
|
||||
for c in cols:
|
||||
cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']]
|
||||
if c in exclude:
|
||||
cur['rarity'] = None
|
||||
else:
|
||||
g = cur.groupby('feature_value').token_id.count().reset_index()
|
||||
g['rarity'] = g.token_id / len(cur.token_id.unique())
|
||||
cur = merge(cur, g[['feature_value','rarity']])
|
||||
attributes = attributes.append(cur)
|
||||
|
||||
##############################
|
||||
# Feature Importance #
|
||||
##############################
|
||||
coefs = []
|
||||
for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_):
|
||||
coefs += [[ collection, a, b, c ]]
|
||||
coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef'])
|
||||
# coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() )
|
||||
# coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) )
|
||||
# coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] )
|
||||
# mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index()
|
||||
# mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ]
|
||||
# coefs = coefs.merge(mn)
|
||||
# coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef
|
||||
# coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef
|
||||
# coefs
|
||||
# g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates()
|
||||
# g['value'] = g.value.astype(str)
|
||||
# len(coefs)
|
||||
# g = coefs.merge(g, how='left')
|
||||
# g[g.rarity.isnull()]
|
||||
# len(g)
|
||||
# coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] )
|
||||
# coefs.sort_values('lin_coef').tail(20)
|
||||
|
||||
# TODO: pick the most common one and have that be the baseline
|
||||
most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1)
|
||||
most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 )
|
||||
mc = most_common.col.unique()
|
||||
data = []
|
||||
for c0 in std_pred_cols_0:
|
||||
if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']:
|
||||
continue
|
||||
f = '_'.join(re.split('_', c0)[1:-1])
|
||||
v = re.split('_', c0)[-1]
|
||||
rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0]
|
||||
# avg = p_metadata['{}_pct'.format(f)].mean()
|
||||
# avg_pct = df.pct.mean()
|
||||
# pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std()
|
||||
r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean()
|
||||
s = df[df['{}_{}'.format(f, v)]==1].std_score.mean()
|
||||
if r == r and s == s:
|
||||
datum = [ c0, rarity ]
|
||||
for c1 in std_pred_cols:
|
||||
datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 )
|
||||
data += [ datum ]
|
||||
|
||||
importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols)
|
||||
sorted(importance.feature.unique())
|
||||
importance[importance.feature == 'std_fur_/_skin_Leopard']
|
||||
if 'std_timestamp' in df.columns:
|
||||
importance['std_timestamp'] = df.std_timestamp.max()
|
||||
# importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values )
|
||||
# importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values ))
|
||||
|
||||
importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values)
|
||||
importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l)
|
||||
# importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l)
|
||||
importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values)
|
||||
importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l
|
||||
# importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l
|
||||
|
||||
importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values )
|
||||
# importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu)
|
||||
importance = importance.sort_values('pred', ascending=0)
|
||||
importance.head()[['feature','pred']]
|
||||
importance[importance.feature == 'std_fur_/_skin_Leopard']
|
||||
importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x))
|
||||
importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1])
|
||||
importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1]))
|
||||
mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'})
|
||||
importance = importance.merge(mn)
|
||||
importance['pred_vs_baseline'] = importance.pred - importance.baseline
|
||||
importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1
|
||||
importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity')
|
||||
importance['collection'] = collection
|
||||
importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']]
|
||||
tmp = importance[std_pred_cols].mean().reset_index()
|
||||
tmp.columns = [ 'a', 'b' ]
|
||||
tmp = tmp.sort_values('b')
|
||||
feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']])
|
||||
|
||||
attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
|
||||
pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]]
|
||||
|
||||
attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
sorted(attributes['feature_name'].unique())
|
||||
if len(feature_values):
|
||||
feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
|
||||
coefsdf.to_csv('./data/coefsdf.csv', index=False)
|
||||
salesdf.to_csv('./data/model_sales.csv', index=False)
|
||||
@ -511,24 +361,6 @@ pred_price.to_csv('./data/pred_price.csv', index=False)
|
||||
attributes.to_csv('./data/attributes.csv', index=False)
|
||||
feature_values.to_csv('./data/feature_values.csv', index=False)
|
||||
|
||||
pred_price = pd.read_csv('./data/pred_price.csv')
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
rem = tokens[tokens.clean_token_id>=10000].token_id.unique()
|
||||
l0 = len(pred_price)
|
||||
pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ]
|
||||
l1 = len(pred_price)
|
||||
pred_price.to_csv('./data/pred_price.csv', index=False)
|
||||
|
||||
# excludedf.to_csv('./data/excludedf.csv', index=False)
|
||||
# listings = pd.read_csv('./data/listings.csv')
|
||||
# listings['token_id'] = listings.token_id.astype(int)
|
||||
|
||||
# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
|
||||
# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
|
||||
# tmp['token_id'] = tmp.token_id.astype(int)
|
||||
# tmp = tmp.merge(listings[['collection','token_id','price']])
|
||||
# tmp.sort_values('pred_price', ascending=0)
|
||||
|
||||
if CHECK_EXCLUDE:
|
||||
salesdf['rat'] = salesdf.price / salesdf.pred
|
||||
salesdf['dff'] = salesdf.price - salesdf.pred
|
||||
@ -542,9 +374,3 @@ if CHECK_EXCLUDE:
|
||||
print(salesdf.exclude.mean())
|
||||
salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
|
||||
salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
|
||||
|
||||
attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ]
|
||||
feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ]
|
||||
sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique())
|
||||
|
||||
pred_price[pred_price.collection == 'peskypenguinclub'].head()
|
||||
574
solana_model_old.py
Normal file
574
solana_model_old.py
Normal file
@ -0,0 +1,574 @@
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import warnings
|
||||
import requests
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import urllib.request
|
||||
import tensorflow as tf
|
||||
import snowflake.connector
|
||||
from datetime import datetime
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.linear_model import LinearRegression, RidgeCV, Lasso
|
||||
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
||||
|
||||
CHECK_EXCLUDE = False
|
||||
CHECK_EXCLUDE = True
|
||||
|
||||
# Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400
|
||||
|
||||
###################################
|
||||
# Define Helper Functions #
|
||||
###################################
|
||||
def standardize_df(df, cols, usedf=None, verbose=False):
|
||||
for c in cols:
|
||||
if type(usedf) != type(pd.DataFrame()):
|
||||
usedf = df
|
||||
mu = usedf[c].mean()
|
||||
sd = usedf[c].std()
|
||||
if verbose:
|
||||
print(c)
|
||||
if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
|
||||
df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
|
||||
else:
|
||||
df['std_{}'.format(c)] = (df[c] - mu) / sd
|
||||
return(df)
|
||||
|
||||
def just_float(x):
|
||||
x = re.sub('[^\d\.]', '', str(x))
|
||||
return(float(x))
|
||||
|
||||
def calculate_percentages(df, cols=[]):
|
||||
add_pct = not 'pct' in df.columns
|
||||
if not len(cols):
|
||||
cols = df.columns
|
||||
if add_pct:
|
||||
df['pct'] = 1
|
||||
for c in cols:
|
||||
g = df[c].value_counts().reset_index()
|
||||
g.columns = [ c, 'N' ]
|
||||
col = '{}_pct'.format(c)
|
||||
g[col] = g.N / g.N.sum()
|
||||
df = df.merge( g[[ c, col ]] )
|
||||
if add_pct:
|
||||
df['pct'] = df.pct * df[col]
|
||||
return(df)
|
||||
|
||||
exclude = [
|
||||
# (collection, token_id, price)
|
||||
( 'aurory', 2239, 3500 )
|
||||
# ( 'aurory', 856, 150 )
|
||||
# ( 'aurory', 4715, 500 )
|
||||
# ( 'aurory', 5561, 298 )
|
||||
# ( 'aurory', 5900, 199 )
|
||||
# ( 'aurory', 3323, 138 )
|
||||
]
|
||||
s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
|
||||
s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head()
|
||||
print(len(s_df[s_df.collection == 'Levana Dragon Eggs']))
|
||||
print(s_df.groupby('collection').token_id.count())
|
||||
s_df.collection.unique()
|
||||
s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
|
||||
s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
|
||||
s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
|
||||
for e in exclude:
|
||||
s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
|
||||
s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
|
||||
|
||||
# exclude wierd data points
|
||||
if not CHECK_EXCLUDE:
|
||||
exclude = pd.read_csv('./data/exclude.csv')
|
||||
s_df = s_df.merge(exclude, how='left')
|
||||
s_df = s_df[s_df.exclude.isnull()]
|
||||
del s_df['exclude']
|
||||
|
||||
m_df = pd.read_csv('./data/metadata.csv')
|
||||
m_df['token_id'] = m_df.token_id.astype(str)
|
||||
tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])]
|
||||
tmp['tmp'] = tmp.token_id.astype(int)
|
||||
tmp.groupby('collection').tmp.max()
|
||||
m_df.head()
|
||||
# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') )
|
||||
s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
|
||||
s_df['timestamp'] = s_df.block_timestamp.astype(int)
|
||||
# del metadata['price']
|
||||
# del metadata['last_sale']
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
|
||||
s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago'])
|
||||
|
||||
s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True)
|
||||
s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
|
||||
# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ]
|
||||
|
||||
s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ]
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
|
||||
s_df = s_df.sort_values(['collection','block_timestamp'])
|
||||
# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True)
|
||||
s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
|
||||
s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40)
|
||||
s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price')
|
||||
s_df['tmp'] = s_df.mn_20 / s_df.md_20
|
||||
|
||||
tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']]
|
||||
tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
|
||||
tmp = tmp.groupby('date').mn_20.median().reset_index()
|
||||
tmp.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
|
||||
s_df['tmp'] = s_df.price / s_df.mn_20
|
||||
s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']]
|
||||
s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']]
|
||||
s_df.groupby('collection').tmp.median()
|
||||
s_df.groupby('collection').tmp.mean()
|
||||
|
||||
s_df.sort_values('tmp').head()
|
||||
s_df['tmp'] = s_df.price / s_df.mn_20
|
||||
s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False)
|
||||
s_df.groupby('collection').tmp.median()
|
||||
s_df.groupby('collection').tmp.mean()
|
||||
s_df.sort_values('tmp', ascending=0).head()
|
||||
s_df.head(21)
|
||||
m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
|
||||
# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() )
|
||||
# m_df.feature_value.unique()
|
||||
pred_cols = {}
|
||||
metadata = {}
|
||||
sales = {}
|
||||
collection_features = {}
|
||||
m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id')
|
||||
c = 'Levana Dragon Eggs'
|
||||
# pred_cols[c]
|
||||
EXCLUDE_COLS = {
|
||||
# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
|
||||
'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count']
|
||||
}
|
||||
NUMERIC_COLS = {
|
||||
'Levana Dragon Eggs': ['rank','score','pct','collection_rank','weight','temperature']
|
||||
}
|
||||
for c in s_df.collection.unique():
|
||||
print('Building {} model'.format(c))
|
||||
exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else []
|
||||
n_cols = NUMERIC_COLS[c] if c in NUMERIC_COLS.keys() else []
|
||||
exclude = [ x for x in exclude if not x in n_cols ]
|
||||
o_cols = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if (not x in exclude) and not (x in n_cols) ])
|
||||
|
||||
sales[c] = s_df[ s_df.collection == c ]
|
||||
pred_cols[c] = sorted( n_cols + o_cols )
|
||||
collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ]
|
||||
metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ]
|
||||
|
||||
# tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] )
|
||||
metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
|
||||
metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c]
|
||||
|
||||
features = collection_features[c]
|
||||
cur = metadata[c]
|
||||
# cur = cur.dropna(subset=features)
|
||||
for f in features:
|
||||
if type(cur[f].values[0] == str):
|
||||
cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) )
|
||||
cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip())
|
||||
cur = cur.replace('', 'Default')
|
||||
# if not 'pct' in cur.columns:
|
||||
cur = calculate_percentages( cur, o_cols )
|
||||
dummies = pd.get_dummies(cur[o_cols])
|
||||
# feature_cols = dummies.columns
|
||||
cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
|
||||
metadata[c] = cur
|
||||
# pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns)
|
||||
# cols = [ 'collection_rank' ]
|
||||
# cols = [ ]
|
||||
# pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns)
|
||||
# pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + list(dummies.columns)
|
||||
pred_cols[c] = n_cols + list(dummies.columns)
|
||||
|
||||
# collection_features = {
|
||||
# 'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ]
|
||||
# , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ]
|
||||
# , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
|
||||
# }
|
||||
|
||||
coefsdf = pd.DataFrame()
|
||||
salesdf = pd.DataFrame()
|
||||
attributes = pd.DataFrame()
|
||||
pred_price = pd.DataFrame()
|
||||
feature_values = pd.DataFrame()
|
||||
collections = sorted(metadata.keys())
|
||||
collection = 'Galactic Punks'
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
collection = 'Levana Dragon Eggs'
|
||||
# for collection in s_df.collection.unique():
|
||||
for collection in ['Levana Dragon Eggs']:
|
||||
# collection = 'LunaBulls'
|
||||
# collection = 'smb'
|
||||
# collection = 'aurory'
|
||||
# collection = 'meerkatmillionaires'
|
||||
print('Working on collection {}'.format(collection))
|
||||
p_metadata = metadata[collection]
|
||||
if 'attribute_count' in p_metadata.columns:
|
||||
p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int)
|
||||
|
||||
p_sales = sales[collection]
|
||||
# specify the predictive features
|
||||
p_pred_cols = pred_cols[collection]
|
||||
if collection == 'Levana Dragon Eggs':
|
||||
p_pred_cols += [ 'transformed_collection_rank' ]
|
||||
p_features = collection_features[collection]
|
||||
p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
for c in [ 'rank','score' ]:
|
||||
p_metadata[c] = p_metadata[c].astype(float)
|
||||
# p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
# p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
|
||||
p_sales['contract_address'] = ''
|
||||
p_metadata['contract_address'] = ''
|
||||
|
||||
# remove 1 columns for each group (since they are colinear)
|
||||
# exclude = []
|
||||
# for f in p_features:
|
||||
# e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1]
|
||||
# exclude.append(e)
|
||||
|
||||
df = p_sales.merge(p_metadata, on=['token_id','contract_address'])
|
||||
df = df[df.mn_20.notnull()]
|
||||
target_col = 'adj_price'
|
||||
df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
|
||||
# df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 )
|
||||
# tmp = df[['block_timestamp','mn_20']].copy()
|
||||
# tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
|
||||
# tmp = tmp.groupby('tmp').mn_20.median().reset_index()
|
||||
# tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False)
|
||||
# df['timestamp'] = df.block_timestamp.astype(int)
|
||||
df = df[df[target_col].notnull()]
|
||||
df = df.reset_index(drop=True)
|
||||
df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) )
|
||||
df['rel_price_0'] = df[target_col] - df.mn_20
|
||||
df['rel_price_1'] = df[target_col] / df.mn_20
|
||||
df = df[df.mn_20 > 0]
|
||||
df['log_mn_20'] = np.log(df.mn_20)
|
||||
print('Training on {} sales'.format(len(df)))
|
||||
# df['price_median'] = df.groupby('token_id').price.median()
|
||||
|
||||
# standardize columns to mean 0 sd 1
|
||||
len(p_pred_cols)
|
||||
n_cols = NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []
|
||||
for c in n_cols:
|
||||
df[c] = df[c].apply(lambda x: just_float(x) )
|
||||
if collection == 'Levana Dragon Eggs':
|
||||
df['transformed_collection_rank'] = df.collection_rank.apply(lambda x: (1.0/ x)**2 )
|
||||
df = standardize_df(df, p_pred_cols)
|
||||
std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ]
|
||||
# p_pred_cols = [ c for c in p_pred_cols if not c in exclude ]
|
||||
std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ]
|
||||
df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
|
||||
# df.sort_values('block_timestamp').head(10)[['price','tx_id']]
|
||||
# df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values
|
||||
# df = df[df.price >= 1]
|
||||
|
||||
#########################
|
||||
# Run the Model #
|
||||
#########################
|
||||
len(df)
|
||||
len(df.dropna(subset=std_pred_cols))
|
||||
tmp = df[std_pred_cols].count().reset_index()
|
||||
tmp.columns = ['a','b']
|
||||
tmp.sort_values('b').head(20)
|
||||
rem = list(tmp[tmp.b==0].a.values)
|
||||
std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
|
||||
mn = df.timestamp.min()
|
||||
mx = df.timestamp.max()
|
||||
df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) )
|
||||
X = df[std_pred_cols].values
|
||||
mu = df.log_price.mean()
|
||||
sd = df.log_price.std()
|
||||
df['std_log_price'] = (df.log_price - mu) / sd
|
||||
# y = df.std_log_price.values
|
||||
# y = df[target_col].values
|
||||
# y = df.rel_price_1.values
|
||||
y_0 = df.rel_price_0.values
|
||||
y_1 = df.rel_price_1.values
|
||||
# y_log = df.log_price.values
|
||||
|
||||
clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_lin.fit(X, y_0, df.weight.values)
|
||||
coefs = []
|
||||
for a, b in zip(std_pred_cols, clf_lin.coef_):
|
||||
coefs += [[a,b]]
|
||||
coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
|
||||
coefs.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
df['pred_lin'] = clf_lin.predict(X)
|
||||
df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
|
||||
df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) )
|
||||
# df['err_lin'] = abs(df.pred_lin - df.price )
|
||||
# df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50)
|
||||
df.head()
|
||||
clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
|
||||
clf_log.fit(X, y_1, df.weight.values)
|
||||
coefs = []
|
||||
for a, b in zip(std_pred_cols, clf_log.coef_):
|
||||
coefs += [[a,b]]
|
||||
coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
|
||||
coefs.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
df['pred_log'] = clf_log.predict(X)
|
||||
df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
|
||||
df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) )
|
||||
df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50)
|
||||
df['err'] = df.err_lin * df.err_log
|
||||
|
||||
df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50)
|
||||
df['collection'] = collection
|
||||
|
||||
# df['pred_lin'] = clf_lin.predict(X)
|
||||
# df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
|
||||
# df['pred_log'] = np.exp(clf_log.predict(X))
|
||||
# df['pred_log'] = clf_log.predict(X)
|
||||
# df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
|
||||
clf = LinearRegression(fit_intercept=False)
|
||||
clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values )
|
||||
print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
|
||||
l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
|
||||
tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price'])
|
||||
if clf.coef_[0] < 0:
|
||||
print('Only using log')
|
||||
df['pred'] = df.pred_log
|
||||
tmp['lin_coef'] = 0
|
||||
tmp['log_coef'] = 1
|
||||
elif clf.coef_[1] < 0:
|
||||
print('Only using lin')
|
||||
df['pred'] = df.pred_lin
|
||||
tmp['lin_coef'] = 1
|
||||
tmp['log_coef'] = 0
|
||||
else:
|
||||
print('Only using BOTH!')
|
||||
df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
|
||||
coefsdf = coefsdf.append(tmp)
|
||||
df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
|
||||
df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]]
|
||||
# df[df.block_timestamp>='2021-10-01'].err.mean()
|
||||
df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
|
||||
df.sort_values('price', ascending=0).head(20).tx_id.values
|
||||
|
||||
# print(np.mean(y))
|
||||
# print(np.mean(clf.predict(X)))
|
||||
|
||||
# # run neural net
|
||||
# model = tf.keras.models.Sequential([
|
||||
# tf.keras.layers.Dense(9, activation='relu')
|
||||
# , tf.keras.layers.Dropout(.2)
|
||||
# , tf.keras.layers.Dense(3, activation='relu')
|
||||
# , tf.keras.layers.Dropout(.2)
|
||||
# , tf.keras.layers.Dense(1, activation='linear')
|
||||
# ])
|
||||
# model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025))
|
||||
# model.fit(X, y, epochs=500, validation_split=0.3)
|
||||
|
||||
# df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu)
|
||||
# df['pred'] = model.predict(df[std_pred_cols].values)
|
||||
# ratio = df.price.mean() / df.pred.mean()
|
||||
# print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1)))
|
||||
|
||||
# checking errors
|
||||
# df['pred'] = df.pred * ratio
|
||||
df['err'] = df[target_col] - df.pred
|
||||
df['q'] = df.pred.rank() * 10 / len(df)
|
||||
df['q'] = df.q.apply(lambda x: int(round(x)) )
|
||||
df['pct_err'] = (df[target_col] / df.pred) - 1
|
||||
pe_mu = df.pct_err.mean()
|
||||
pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
|
||||
pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
|
||||
df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
|
||||
df['pred_sd'] = df.pred * pe_sd
|
||||
print(df.groupby('q')[['err','pred',target_col]].mean())
|
||||
print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean())
|
||||
# df.err.mean()
|
||||
# df[df.weight >= 3.5].err.mean()
|
||||
df['collection'] = collection
|
||||
print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
|
||||
salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) )
|
||||
|
||||
# create the attributes dataframe
|
||||
for f in p_features:
|
||||
if f and '{}_pct'.format(f) in p_metadata.columns:
|
||||
cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]]
|
||||
cur.columns = [ 'token_id', 'value','rarity' ]
|
||||
cur['feature'] = f
|
||||
cur['collection'] = collection
|
||||
attributes = attributes.append(cur)
|
||||
|
||||
# create predictions for each NFT in the collection
|
||||
test = p_metadata.copy()
|
||||
for c in n_cols:
|
||||
test[c] = test[c].apply(lambda x: just_float(x) )
|
||||
if collection in [ 'Levana Dragon Eggs' ]:
|
||||
test['transformed_collection_rank'] = test.collection_rank.apply(lambda x: (1.0 / x) ** 2 )
|
||||
tail = df.sort_values('timestamp').tail(1)
|
||||
for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
|
||||
if c in tail.columns:
|
||||
test[c] = tail[c].values[0]
|
||||
test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df, True)
|
||||
# test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values )
|
||||
# test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values ))
|
||||
|
||||
test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values)
|
||||
test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
|
||||
# test['pred_lin'] = df.pred_lin + df.mn_20
|
||||
# df['pred_log'] = np.exp(clf_log.predict(X))
|
||||
test['pred_log'] = clf_log.predict(test[std_pred_cols].values)
|
||||
test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
|
||||
|
||||
test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
|
||||
# test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio
|
||||
test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) )
|
||||
if not CHECK_EXCLUDE:
|
||||
test['pred_price'] = test.pred.apply(lambda x: (x*0.985) )
|
||||
test['pred_sd'] = test.pred * pe_sd
|
||||
test['rk'] = test.pred.rank(ascending=0, method='first')
|
||||
test['collection'] = collection
|
||||
pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') )
|
||||
# print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price'))
|
||||
|
||||
|
||||
##############################
|
||||
# Feature Importance #
|
||||
##############################
|
||||
coefs = []
|
||||
for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_):
|
||||
coefs += [[ collection, a, b, c ]]
|
||||
coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef'])
|
||||
# coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() )
|
||||
# coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) )
|
||||
# coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] )
|
||||
# mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index()
|
||||
# mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ]
|
||||
# coefs = coefs.merge(mn)
|
||||
# coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef
|
||||
# coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef
|
||||
# coefs
|
||||
# g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates()
|
||||
# g['value'] = g.value.astype(str)
|
||||
# len(coefs)
|
||||
# g = coefs.merge(g, how='left')
|
||||
# g[g.rarity.isnull()]
|
||||
# len(g)
|
||||
# coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] )
|
||||
# coefs.sort_values('lin_coef').tail(20)
|
||||
|
||||
# TODO: pick the most common one and have that be the baseline
|
||||
most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1)
|
||||
most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 )
|
||||
mc = most_common.col.unique()
|
||||
data = []
|
||||
for c0 in std_pred_cols_0:
|
||||
if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']:
|
||||
continue
|
||||
f = '_'.join(re.split('_', c0)[1:-1])
|
||||
v = re.split('_', c0)[-1]
|
||||
rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0]
|
||||
# avg = p_metadata['{}_pct'.format(f)].mean()
|
||||
# avg_pct = df.pct.mean()
|
||||
# pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std()
|
||||
r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean()
|
||||
s = df[df['{}_{}'.format(f, v)]==1].std_score.mean()
|
||||
if r == r and s == s:
|
||||
datum = [ c0, rarity ]
|
||||
for c1 in std_pred_cols:
|
||||
datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 )
|
||||
data += [ datum ]
|
||||
|
||||
importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols)
|
||||
sorted(importance.feature.unique())
|
||||
importance[importance.feature == 'std_fur_/_skin_Leopard']
|
||||
if 'std_timestamp' in df.columns:
|
||||
importance['std_timestamp'] = df.std_timestamp.max()
|
||||
# importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values )
|
||||
# importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values ))
|
||||
|
||||
importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values)
|
||||
importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l)
|
||||
# importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l)
|
||||
importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values)
|
||||
importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l
|
||||
# importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l
|
||||
|
||||
importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values )
|
||||
# importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu)
|
||||
importance = importance.sort_values('pred', ascending=0)
|
||||
importance.head()[['feature','pred']]
|
||||
importance[importance.feature == 'std_fur_/_skin_Leopard']
|
||||
importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x))
|
||||
importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1])
|
||||
importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1]))
|
||||
mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'})
|
||||
importance = importance.merge(mn)
|
||||
importance['pred_vs_baseline'] = importance.pred - importance.baseline
|
||||
importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1
|
||||
importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity')
|
||||
importance['collection'] = collection
|
||||
importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']]
|
||||
tmp = importance[std_pred_cols].mean().reset_index()
|
||||
tmp.columns = [ 'a', 'b' ]
|
||||
tmp = tmp.sort_values('b')
|
||||
feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']])
|
||||
|
||||
attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() )
|
||||
|
||||
pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]]
|
||||
|
||||
|
||||
coefsdf.to_csv('./data/coefsdf.csv', index=False)
|
||||
salesdf.to_csv('./data/model_sales.csv', index=False)
|
||||
pred_price.to_csv('./data/pred_price.csv', index=False)
|
||||
attributes.to_csv('./data/attributes.csv', index=False)
|
||||
feature_values.to_csv('./data/feature_values.csv', index=False)
|
||||
|
||||
pred_price = pd.read_csv('./data/pred_price.csv')
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
rem = tokens[tokens.clean_token_id>=10000].token_id.unique()
|
||||
l0 = len(pred_price)
|
||||
pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ]
|
||||
l1 = len(pred_price)
|
||||
pred_price.to_csv('./data/pred_price.csv', index=False)
|
||||
|
||||
# listings = pd.read_csv('./data/listings.csv')
|
||||
# listings['token_id'] = listings.token_id.astype(int)
|
||||
|
||||
# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
|
||||
# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
|
||||
# tmp['token_id'] = tmp.token_id.astype(int)
|
||||
# tmp = tmp.merge(listings[['collection','token_id','price']])
|
||||
# tmp.sort_values('pred_price', ascending=0)
|
||||
|
||||
if CHECK_EXCLUDE:
|
||||
salesdf['rat'] = salesdf.price / salesdf.pred
|
||||
salesdf['dff'] = salesdf.price - salesdf.pred
|
||||
salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
|
||||
salesdf['rat'] = salesdf.pred / salesdf.price
|
||||
salesdf['dff'] = salesdf.pred - salesdf.price
|
||||
salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
|
||||
salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
|
||||
print(salesdf.exclude_1.mean())
|
||||
print(salesdf.exclude_2.mean())
|
||||
print(salesdf.exclude.mean())
|
||||
salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
|
||||
salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
|
||||
|
||||
attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ]
|
||||
feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ]
|
||||
sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique())
|
||||
|
||||
pred_price[pred_price.collection == 'peskypenguinclub'].head()
|
||||
79
viz/server.R
79
viz/server.R
@ -45,7 +45,7 @@ server <- function(input, output, session) {
|
||||
selectInput(
|
||||
inputId = 'collectionname'
|
||||
, label = NULL
|
||||
, selected = 'LunaBulls'
|
||||
, selected = 'Levana Dragon Eggs'
|
||||
, choices = choices
|
||||
, width = "100%"
|
||||
)
|
||||
@ -197,24 +197,27 @@ server <- function(input, output, session) {
|
||||
return(head(attributes, 0))
|
||||
}
|
||||
cur <- attributes[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]
|
||||
cur <- merge( cur, feature_values[collection == eval(selected), list(feature, value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE )
|
||||
# cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE )
|
||||
cur <- cur[order(rarity)]
|
||||
floor <- getFloors()[2]
|
||||
log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1]
|
||||
lin_coef <- coefsdf[ collection == eval(selected) ]$lin_coef[1]
|
||||
s <- sum(cur$pct_vs_baseline)
|
||||
p <- getPredPrice()
|
||||
p <- as.numeric(p[ token_id == eval(as.numeric(id)) ]$pred_price)
|
||||
# p <- pred_price[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]$pred_price
|
||||
ratio <- (p / floor) - 1
|
||||
ratio <- pmax(0, ratio)
|
||||
if (ratio > 0 & length(ratio) > 0) {
|
||||
mult <- ratio / s
|
||||
cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ]
|
||||
}
|
||||
cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ]
|
||||
cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ]
|
||||
cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ]
|
||||
# floor <- getFloors()[2]
|
||||
# log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1]
|
||||
# lin_coef <- coefsdf[ collection == eval(selected) ]$lin_coef[1]
|
||||
# s <- sum(cur$pct_vs_baseline)
|
||||
# p <- getPredPrice()
|
||||
# p <- as.numeric(p[ token_id == eval(as.numeric(id)) ]$pred_price)
|
||||
# # p <- pred_price[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]$pred_price
|
||||
# ratio <- (p / floor) - 1
|
||||
# ratio <- pmax(0, ratio)
|
||||
# if (ratio > 0 & length(ratio) > 0) {
|
||||
# mult <- ratio / s
|
||||
# cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ]
|
||||
# }
|
||||
cur[, vs_baseline := 0 ]
|
||||
cur[, pred_vs_baseline := 0 ]
|
||||
cur[, vs_baseline := 0 ]
|
||||
# cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ]
|
||||
# cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ]
|
||||
# cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ]
|
||||
return(cur)
|
||||
})
|
||||
|
||||
@ -223,9 +226,11 @@ server <- function(input, output, session) {
|
||||
if( nrow(data) == 0 ) {
|
||||
return(NULL)
|
||||
}
|
||||
data[, rarity := paste0(format(round(rarity*100, 2), digits=4, decimal.mark="."),'%') ]
|
||||
data[, rarity := ifelse(is.na(rarity), '', paste0(format(round(rarity*100, 2), digits=4, decimal.mark="."),'%') )]
|
||||
|
||||
# reactable(data[, list( feature, value, rarity, vs_baseline, pred_vs_baseline, pct_vs_baseline )],
|
||||
data <- data[, list( feature, value, rarity, pct_vs_baseline )]
|
||||
# data <- data[, list( feature, value, rarity, pct_vs_baseline )]
|
||||
data <- data[, list( feature_name, feature_value, rarity )]
|
||||
reactable(data,
|
||||
defaultColDef = colDef(
|
||||
headerStyle = list(background = "#10151A")
|
||||
@ -234,17 +239,17 @@ server <- function(input, output, session) {
|
||||
borderless = TRUE,
|
||||
outlined = FALSE,
|
||||
columns = list(
|
||||
feature = colDef(name = "Attribute", align = "left"),
|
||||
value = colDef(name = "Value", align = "left"),
|
||||
rarity = colDef(name = "Rarity", align = "left"),
|
||||
pct_vs_baseline = colDef(
|
||||
name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor")
|
||||
, html = TRUE
|
||||
, align = "left"
|
||||
, cell = function(x) {
|
||||
htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%'))
|
||||
}
|
||||
)
|
||||
feature_name = colDef(name = "Attribute", align = "left"),
|
||||
feature_value = colDef(name = "Value", align = "left"),
|
||||
rarity = colDef(name = "Rarity", align = "left")
|
||||
# pct_vs_baseline = colDef(
|
||||
# name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor")
|
||||
# , html = TRUE
|
||||
# , align = "left"
|
||||
# , cell = function(x) {
|
||||
# htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%'))
|
||||
# }
|
||||
# )
|
||||
)
|
||||
)
|
||||
})
|
||||
@ -255,7 +260,7 @@ server <- function(input, output, session) {
|
||||
return(NULL)
|
||||
}
|
||||
data <- feature_values[ collection == eval(selected)]
|
||||
reactable(data[, list( feature, value, rarity, pct_vs_baseline )],
|
||||
reactable(data[, list( feature_name, feature_value, rarity, pct_vs_baseline )],
|
||||
defaultColDef = colDef(
|
||||
headerStyle = list(background = "#10151A")
|
||||
),
|
||||
@ -263,8 +268,8 @@ server <- function(input, output, session) {
|
||||
outlined = FALSE,
|
||||
searchable = TRUE,
|
||||
columns = list(
|
||||
feature = colDef(name = "Attribute", align = "left"),
|
||||
value = colDef(name = "Value", align = "left"),
|
||||
feature_name = colDef(name = "Attribute", align = "left"),
|
||||
feature_value = colDef(name = "Value", align = "left"),
|
||||
rarity = colDef(name = "Rarity", align = "left", cell = function(x) {
|
||||
htmltools::tags$span(paste0(format(x*100, digits=3, decimal.mark=".", big.mark=","),'%'))
|
||||
}),
|
||||
@ -504,7 +509,9 @@ server <- function(input, output, session) {
|
||||
df[, deal_score := round(pmin( 100, pmax(0, deal_score) )) ]
|
||||
df[, deal_score := pnorm(price, pred_price, eval(SD_SCALE) * pred_sd * pred_price / pred_price_0), by = seq_len(nrow(df)) ]
|
||||
df[, deal_score := round(100 * (1 - deal_score)) ]
|
||||
df[, pred_price := round(pred_price) ]
|
||||
# df[, pred_price := round(pred_price) ]
|
||||
df[, pred_price := paste0(format(round(pred_price, 1), digits=3, decimal.mark=".", big.mark=",")) ]
|
||||
|
||||
df <- df[, list(token_id, price, pred_price, deal_score)]
|
||||
df <- df[order(-deal_score)]
|
||||
return(df)
|
||||
@ -517,7 +524,7 @@ server <- function(input, output, session) {
|
||||
if( nrow(df) == 0 ) {
|
||||
return(NULL)
|
||||
}
|
||||
df <- df[ deal_score >= 10 ]
|
||||
df <- df[ deal_score >= 0 ]
|
||||
df[, hover_text := paste0('<b>#',token_id,'</b><br>Listing Price: ',price,'<br>Fair Market Price: ',pred_price,'<br>Deal Score: ',deal_score) ]
|
||||
|
||||
fig <- plot_ly(
|
||||
|
||||
34
viz/ui.R
34
viz/ui.R
@ -102,17 +102,33 @@ fluidPage(
|
||||
, fluidRow(
|
||||
class="grey8row"
|
||||
, h2("Listings", icon(class="padding-left-10", id="listings-tooltip", "info-circle"))
|
||||
, bsTooltip(id = "listings-tooltip", title = "Plot only shows listings with deal score > 10; Click a dot to select the token", placement = "bottom", trigger = "hover")
|
||||
, bsTooltip(id = "listings-tooltip", title = "Plot only shows listings with deal score > 5; Click a dot to select the token", placement = "bottom", trigger = "hover")
|
||||
, div(
|
||||
class = "listing-plot"
|
||||
, plotlyOutput("listingplot", height = 500)
|
||||
, div(class='description', 'Plot only shows listings with deal score > 10')
|
||||
, div(class='description', 'Plot only shows listings with deal score > 5')
|
||||
, div(class='description', 'Click a dot to select the token')
|
||||
)
|
||||
, div(class = "table", reactableOutput("listingtable"))
|
||||
, div(class = "description", 'This app is still in beta - listings updates will be periodic (but at least 3x a week)')
|
||||
, div(class = "link", uiOutput('listingurl'))
|
||||
)
|
||||
, fluidRow(
|
||||
class="grey8row faq"
|
||||
, h2("FAQ")
|
||||
, h4("What is NFT Deal Score?")
|
||||
, div("We use historical sales data to determine the values and the rankings of each NFT.")
|
||||
, h4("Why is this rank different?")
|
||||
, div("Although rarity is a feature in our model, it is not just a rarity-based ranking. Certain features are put at a higher premium on the secondary marketplace, and this ranking reflects that.")
|
||||
, h4("Why are the rarity %s different?")
|
||||
, div("Our %s reflect only the NFTs in existence. Other tools may include more theoretical numbers.")
|
||||
, h4("How does the model work?")
|
||||
, div("Each attribute is an input into the model. We are working to add better model explanations to the tool.")
|
||||
, h4("How often is the data updated?")
|
||||
, div("Listings are updated 3x / week. Model is updated weekly.")
|
||||
, h4("Where can I send my questions?")
|
||||
, div(a(class="", href="https://twitter.com/nftdealscore", "@nftdealscore"), " on Twitter")
|
||||
)
|
||||
, fluidRow(
|
||||
class="grey8row"
|
||||
, h2("NFT Rankings", icon(class="padding-left-10", id="nft-rankings-tooltip", "info-circle"))
|
||||
@ -127,11 +143,11 @@ fluidPage(
|
||||
, div(class = "table", reactableOutput("salestable"))
|
||||
, div(class = "description", 'This app is still in beta - sales data may be incomplete or delayed')
|
||||
)
|
||||
, fluidRow(
|
||||
class="grey8row"
|
||||
, h2("Feature Summary", icon(class="padding-left-10", id="feature-summary-tooltip", "info-circle"))
|
||||
, bsTooltip(id = "feature-summary-tooltip", title = "Shows the rarity and estimated price impact of each feature", placement = "bottom", trigger = "hover")
|
||||
, div(class = "table", reactableOutput("featurestable"))
|
||||
, div(class = "description", 'Shows the rarity and estimated price impact of each feature')
|
||||
)
|
||||
# , fluidRow(
|
||||
# class="grey8row"
|
||||
# , h2("Feature Summary", icon(class="padding-left-10", id="feature-summary-tooltip", "info-circle"))
|
||||
# , bsTooltip(id = "feature-summary-tooltip", title = "Shows the rarity and estimated price impact of each feature", placement = "bottom", trigger = "hover")
|
||||
# , div(class = "table", reactableOutput("featurestable"))
|
||||
# , div(class = "description", 'Shows the rarity and estimated price impact of each feature')
|
||||
# )
|
||||
)
|
||||
|
||||
@ -264,6 +264,15 @@ tr {
|
||||
}
|
||||
|
||||
|
||||
/****************/
|
||||
/* FAQ */
|
||||
/****************/
|
||||
.faq > h4 {
|
||||
font-size: 22px;
|
||||
padding-top: 32px;
|
||||
}
|
||||
|
||||
|
||||
/*******************/
|
||||
/* General */
|
||||
/*******************/
|
||||
|
||||
Loading…
Reference in New Issue
Block a user