From 9dd1d71538210179acaf5aed64ef4abe3c4bbf16 Mon Sep 17 00:00:00 2001 From: flipside-kellen Date: Thu, 23 Dec 2021 12:00:31 -0800 Subject: [PATCH] levana --- load_data.py | 36 +- metadata/sql/Levana Dragon Eggs.txt | 10 +- metadata/sql/Levana Dust.txt | 12 +- metadata/sql/Levana Meteors.txt | 2 +- scrape_sol_nfts.py | 30 +- solana_model.py | 528 +++++++++---------------- solana_model_old.py | 574 ++++++++++++++++++++++++++++ viz/server.R | 79 ++-- viz/ui.R | 34 +- viz/www/styles.css | 9 + 10 files changed, 887 insertions(+), 427 deletions(-) create mode 100644 solana_model_old.py diff --git a/load_data.py b/load_data.py index 520bebd2..4032fe37 100644 --- a/load_data.py +++ b/load_data.py @@ -55,8 +55,10 @@ def run_queries(): metadata = ctx.cursor().execute(' '.join(query)) metadata = pd.DataFrame.from_records(iter(metadata), columns=[x[0] for x in metadata.description]) metadata = clean_colnames(metadata) + metadata['image'] = metadata.image.apply(lambda x: 'https://cloudflare-ipfs.com/ipfs/'+re.split('/', x)[-1] ) metadata['collection'] = c metadata['chain'] = 'Terra' + list(metadata.image.values[:2]) + list(metadata.image.values[-2:]) metadata.to_csv('./data/metadata/{}.csv'.format(c), index=False) # old = pd.read_csv('./data/metadata.csv') # old = old[-old.collection.isin(metadata.collection.unique())] @@ -75,7 +77,7 @@ def add_terra_tokens(): , msg_value:execute_msg:mint_nft:extension:name AS name , msg_value:execute_msg:mint_nft:extension:image AS image FROM terra.msgs - WHERE msg_value:contract::string = 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2' + WHERE msg_value:contract::string = 'terra16wuzgsx3tz4hkqu73q5s7unxenefkkvefvewsh' AND tx_status = 'SUCCEEDED' AND msg_value:execute_msg:mint_nft is not null ''' @@ -164,6 +166,9 @@ def add_terra_metadata(): metadata['attribute_count'] = 0 l = len(metadata) incl_att_count = not collection in [ 'Levana Dragon Eggs' ] + metadata.groupby('cracking_date').token_id.count() + metadata.groupby('weight').token_id.count() + metadata[metadata.cracking_date=='2471-12-22'][['token_id']] for c in list(metadata.columns) + ['attribute_count']: if c in ['token_id','collection','pct','levana_rank','meteor_id']: continue @@ -186,11 +191,18 @@ def add_terra_metadata(): # metadata.sort_values('pct_rank') metadata.sort_values('pct') metadata['rank'] = metadata.pct.rank() - metadata['score'] = metadata.pct.apply(lambda x: 1.0 / x ) - mn = metadata.score.min() - metadata['score'] = metadata.score.apply(lambda x: x / mn ) - metadata.score.max() - metadata.sort_values('rank')[['rank','pct','score']] + metadata['rarity_score'] = metadata.pct.apply(lambda x: 1.0 / (x**0.2) ) + mn = metadata.rarity_score.min() + mx = metadata.rarity_score.max() + metadata['rarity_score'] = metadata.rarity_score.apply(lambda x: round(((x - mn) * 999 / (mx - mn)) + 1) ) + metadata.sort_values('rarity_score', ascending=0).head(20)[['token_id','collection_rank','rarity_score']] + metadata.sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score']] + metadata[metadata.token_id==6157].sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score','rank']] + metadata[metadata['rank']>=3000].groupby('weight').token_id.count() + + metadata.rarity_score.max() + metadata.rarity_score.min() + metadata.sort_values('rank')[['rank','pct','rarity_score']] m = pd.DataFrame() for c in metadata.columns: @@ -201,16 +213,20 @@ def add_terra_metadata(): m = m.append(cur) m['chain'] = 'Terra' m.groupby('feature_name').feature_value.count() - m[m.feature_name=='face'].groupby('feature_value').token_id.count() - print(len(m.token_id.unique())) + if collection == 'Levana Dragon Eggs': + add = m[m.feature_name=='collection_rank'] + add['feature_name'] = 'transformed_collection_rank' + add['feature_value'] = add.feature_value.apply(lambda x: (1.0/ (x + 0.5))**1 ) + m = m.append(add) g = m.groupby('feature_value').feature_name.count().reset_index().sort_values('feature_name').tail(50) old = pd.read_csv('./data/metadata.csv') if not 'chain' in old.columns: old['chain'] = old.collection.apply(lambda x: 'Terra' if x in [ 'Galactic Punks', 'LunaBulls' ] else 'Solana' ) old = old[-old.collection.isin(m.collection.unique())] old = old.append(m) - old = old.drop_duplicates() - print(old.groupby(['chain','collection']).token_id.count()) + old = old.drop_duplicates(subset=['collection','token_id','feature_name']) + old = old[-(old.feature_name.isin(['last_sale']))] + # print(old.groupby(['chain','collection']).token_id.count()) print(old[['chain','collection','token_id']].drop_duplicates().groupby(['chain','collection']).token_id.count()) old.to_csv('./data/metadata.csv', index=False) diff --git a/metadata/sql/Levana Dragon Eggs.txt b/metadata/sql/Levana Dragon Eggs.txt index 6f43dfef..e01a7ef0 100644 --- a/metadata/sql/Levana Dragon Eggs.txt +++ b/metadata/sql/Levana Dragon Eggs.txt @@ -4,8 +4,8 @@ WITH legendary_traits AS ( block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, - msg_value:execute_msg:mint:token_id::string as tokenid, + msg_value:execute_msg:mint:extension:image::string as image, + msg_value:execute_msg:mint:token_id::string as tokenid, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, msg_value:execute_msg:mint:extension:attributes[2]:value::string as origin, @@ -39,7 +39,7 @@ WITH legendary_traits AS ( block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as tokenid, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -74,7 +74,7 @@ WITH legendary_traits AS ( block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as tokenid, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -109,7 +109,7 @@ WITH legendary_traits AS ( block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as tokenid, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, diff --git a/metadata/sql/Levana Dust.txt b/metadata/sql/Levana Dust.txt index f1e78e8b..dc6fa75d 100644 --- a/metadata/sql/Levana Dust.txt +++ b/metadata/sql/Levana Dust.txt @@ -5,7 +5,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -38,7 +38,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -74,7 +74,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -109,7 +109,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -144,7 +144,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, @@ -179,7 +179,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, diff --git a/metadata/sql/Levana Meteors.txt b/metadata/sql/Levana Meteors.txt index 30c03835..9ad5f35a 100644 --- a/metadata/sql/Levana Meteors.txt +++ b/metadata/sql/Levana Meteors.txt @@ -4,7 +4,7 @@ select block_timestamp, block_id, tx_id, msg_value:execute_msg:mint:extension:name::string as name, - CONCAT('https://d75aawrtvbfp1.cloudfront.net/',msg_value:execute_msg:mint:extension:image::string) as image, + msg_value:execute_msg:mint:extension:image::string as image, msg_value:execute_msg:mint:token_id::string as token_id, msg_value:execute_msg:mint:extension:attributes[0]:value::string as rarity, msg_value:execute_msg:mint:extension:attributes[1]:value::string as rank, diff --git a/scrape_sol_nfts.py b/scrape_sol_nfts.py index 589cb494..8952f3b2 100644 --- a/scrape_sol_nfts.py +++ b/scrape_sol_nfts.py @@ -39,16 +39,18 @@ def clean_name(name): def scrape_randomearth(): d_address = { 'Galactic Punks': 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k', - 'LunaBulls': 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2' + 'LunaBulls': 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2', + 'Levana Dragon Eggs': 'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg', } data = [] - for collection in [ 'Galactic Punks', 'LunaBulls' ]: + # for collection in [ 'Levana Dragon Eggs' ]: + for collection in d_address.keys(): print(collection) page = 0 has_more = True while has_more: page += 1 - print('Page #{}'.format(page)) + print('Page #{} ({})'.format(page, len(data))) url = 'https://randomearth.io/api/items?collection_addr={}&sort=price.asc&page={}&on_sale=1'.format( d_address[collection], page) browser.get(url) soup = BeautifulSoup(browser.page_source) @@ -59,6 +61,7 @@ def scrape_randomearth(): for i in j['items']: data += [[ 'Terra', collection, i['token_id'], i['price'] / (10 ** 6) ]] df = pd.DataFrame(data, columns=['chain','collection','token_id','price']) + df.to_csv('~/Downloads/tmp.csv', index=False) old = pd.read_csv('./data/listings.csv') old = old[-old.collection.isin(df.collection.unique())] old = old.append(df) @@ -189,9 +192,12 @@ def convert_collection_names(): ,'boryokudragonz': 'Boryoku Dragonz' } for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]: - df = pd.read_csv('./data/{}.csv'.format(c)) - df['collection'] = df.collection.apply(lambda x: clean_name(x) if x in d.keys() else x ) - df.to_csv('./data/{}.csv'.format(c), index=False) + try: + df = pd.read_csv('./data/{}.csv'.format(c)) + df['collection'] = df.collection.apply(lambda x: clean_name(x) if x in d.keys() else x ) + df.to_csv('./data/{}.csv'.format(c), index=False) + except: + pass def scrape_recent_sales(): o_sales = pd.read_csv('./data/sales.csv') @@ -234,6 +240,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk , 'degenapes': 'degen-ape-academy' , 'peskypenguinclub': 'pesky-penguins' } + collection = 'smb' for collection in collections: if collection == 'boryokudragonz': continue @@ -249,7 +256,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk print('{} page #{} ({})'.format(collection, page, len(data))) sleep(3) page += 1 - for j in [25, 30, 35, 30, 25] * 2: + for j in [20, 30, 30, 30, 30, 30, 30, 30] * 1: for _ in range(1): soup = BeautifulSoup(browser.page_source) # for row in browser.find_elements_by_class_name('ag-row'): @@ -325,6 +332,7 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk pred_price = pd.read_csv('./data/pred_price.csv')[['collection','token_id','pred_price','pred_sd']] pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x)) + pred_price['token_id'] = pred_price.token_id.astype(str) pred_price = pred_price.merge(listings) coefsdf = pd.read_csv('./data/coefsdf.csv') @@ -338,7 +346,10 @@ def scrape_listings(collections = [ 'aurory','thugbirdz','smb','degenapes','pesk metadata = pd.read_csv('./data/metadata.csv') solana_blob = metadata[ (metadata.collection == 'aurory') & (metadata.feature_name == 'skin') & (metadata.feature_value == 'Solana Blob (9.72%)')].token_id.unique() - pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 8 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 ) + pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 4 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 ) + + solana_blob = metadata[ (metadata.collection == 'aurory') & (metadata.feature_name == 'hair') & (metadata.feature_value == 'Long Blob Hair (9.72%)')].token_id.unique() + pred_price['pred_price'] = pred_price.apply(lambda x: (x['pred_price'] * 0.8) - 2 if x['token_id'] in solana_blob and x['collection'] == 'Aurory' else x['pred_price'], 1 ) pred_price['abs_chg'] = (pred_price.floor - pred_price.floor_price) * pred_price.lin_coef pred_price['pct_chg'] = (pred_price.floor - pred_price.floor_price) * pred_price.log_coef @@ -711,6 +722,7 @@ def scratch(): # print('Sleeping until {}'.format(sleep_to)) # sleep(60 * 15) alerted = [] +scrape_randomearth() alerted = scrape_listings(alerted = alerted) -# scrape_randomearth() +# alerted = scrape_listings(['smb'],alerted = alerted) convert_collection_names() \ No newline at end of file diff --git a/solana_model.py b/solana_model.py index 9cf843cd..20bd560e 100644 --- a/solana_model.py +++ b/solana_model.py @@ -10,7 +10,7 @@ import tensorflow as tf import snowflake.connector from datetime import datetime from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import LinearRegression, RidgeCV +from sklearn.linear_model import LinearRegression, RidgeCV, Lasso from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV warnings.filterwarnings('ignore') @@ -18,26 +18,45 @@ warnings.filterwarnings('ignore') os.chdir('/Users/kellenblumberg/git/nft-deal-score') CHECK_EXCLUDE = False -CHECK_EXCLUDE = True +# CHECK_EXCLUDE = True # Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400 ################################### # Define Helper Functions # ################################### -def standardize_df(df, cols, usedf=None): +def standardize_df(df, cols, usedf=None, verbose=False): for c in cols: if type(usedf) != type(pd.DataFrame()): usedf = df mu = usedf[c].mean() sd = usedf[c].std() - # print(c) + if verbose: + print(c) if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) else: df['std_{}'.format(c)] = (df[c] - mu) / sd return(df) +def merge(left, right, on=None, how='inner', ensure=True, verbose=True): + df = left.merge(right, on=on, how=how) + if len(df) != len(left) and (ensure or verbose): + print('{} -> {}'.format(len(left), len(df))) + cur = left.merge(right, on=on, how='left') + cols = set(right.columns).difference(set(left.columns)) + print(cols) + col = list(cols)[0] + missing = cur[cur[col].isnull()] + print(missing.head()) + if ensure: + assert(False) + return(df) + +def just_float(x): + x = re.sub('[^\d\.]', '', str(x)) + return(float(x)) + def calculate_percentages(df, cols=[]): add_pct = not 'pct' in df.columns if not len(cols): @@ -64,13 +83,9 @@ exclude = [ # ( 'aurory', 3323, 138 ) ] s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) -s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head() -print(len(s_df[s_df.collection == 'Levana Dragon Eggs'])) -print(s_df.groupby('collection').token_id.count()) -s_df.collection.unique() s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] -s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] +s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] for e in exclude: s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] @@ -82,241 +97,183 @@ if not CHECK_EXCLUDE: s_df = s_df[s_df.exclude.isnull()] del s_df['exclude'] +######################### +# Load Metadata # +######################### m_df = pd.read_csv('./data/metadata.csv') m_df['token_id'] = m_df.token_id.astype(str) -tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])] -tmp['tmp'] = tmp.token_id.astype(int) -tmp.groupby('collection').tmp.max() -m_df.head() -# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') ) +# remove ones that are not actually metadata +m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] +m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x ) +m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')] + + +##################################### +# Exclude Special LunaBulls # +##################################### +tokens = pd.read_csv('./data/tokens.csv') +tokens.token_id.unique() +lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique() +m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ] +s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ] + + +########################### +# Calculate Floor # +########################### s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) s_df['timestamp'] = s_df.block_timestamp.astype(int) -# del metadata['price'] -# del metadata['last_sale'] -s_df = s_df.sort_values(['collection','block_timestamp']) -s_df['mn_20'] = s_df.groupby('collection').price.shift(1) -s_df = s_df.sort_values(['collection','block_timestamp']) s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) -s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago']) -s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True) -s_df = s_df.sort_values(['collection','block_timestamp']) -# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True) -s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) -# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ] - -s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ] +# lowest price in last 20 sales +s_df = s_df.sort_values(['collection','block_timestamp']) +s_df['mn_20'] = s_df.groupby('collection').price.shift(1) +s_df = s_df.sort_values(['collection','block_timestamp']) +s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) + +# exclude sales that are far below the existing floor +s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] + +# 10%ile of last 20 sales s_df = s_df.sort_values(['collection','block_timestamp']) s_df['mn_20'] = s_df.groupby('collection').price.shift(1) s_df = s_df.sort_values(['collection','block_timestamp']) -# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True) s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) -s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40) -s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price') -s_df['tmp'] = s_df.mn_20 / s_df.md_20 -tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']] -tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) -tmp = tmp.groupby('date').mn_20.median().reset_index() -tmp.to_csv('~/Downloads/tmp.csv', index=False) -s_df['tmp'] = s_df.price / s_df.mn_20 -s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']] -s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']] -s_df.groupby('collection').tmp.median() -s_df.groupby('collection').tmp.mean() - -s_df.sort_values('tmp').head() -s_df['tmp'] = s_df.price / s_df.mn_20 -s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False) -s_df.groupby('collection').tmp.median() -s_df.groupby('collection').tmp.mean() -s_df.sort_values('tmp', ascending=0).head() -s_df.head(21) -m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] -# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() ) -# m_df.feature_value.unique() -pred_cols = {} -metadata = {} -sales = {} -collection_features = {} -m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id') -c = 'Galactic Punks' -EXCLUDE_COLS = { - 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] -} -for c in s_df.collection.unique(): - print('Building {} model'.format(c)) - sales[c] = s_df[ s_df.collection == c ] - exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else [] - pred_cols[c] = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if not x in exclude]) - collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ] - metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ] - - # tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] ) - metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() - metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c] - - features = collection_features[c] - cur = metadata[c] - cur = cur.dropna(subset=features) - for f in features: - if type(cur[f].values[0] == str): - cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) ) - cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip()) - cur = cur.replace('', 'Default') - # if not 'pct' in cur.columns: - cur = calculate_percentages( cur, features ) - dummies = pd.get_dummies(cur[features]) - feature_cols = dummies.columns - cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) - metadata[c] = cur - # pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns) - cols = [ 'collection_rank' ] - cols = [ ] - pred_cols[c] = [ 'rank','transform_rank','score'] + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns) - -# collection_features = { -# 'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ] -# , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ] -# , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# } - -excludedf = pd.DataFrame() +########################### +# Calculate Floor # +########################### coefsdf = pd.DataFrame() salesdf = pd.DataFrame() attributes = pd.DataFrame() pred_price = pd.DataFrame() feature_values = pd.DataFrame() -collections = sorted(metadata.keys()) -collection = 'Galactic Punks' -tokens = pd.read_csv('./data/tokens.csv') -collection = 'Levana Dragon Eggs' -# for collection in s_df.collection.unique(): -for collection in ['Levana Dragon Eggs']: - # collection = 'LunaBulls' - # collection = 'smb' - # collection = 'aurory' - # collection = 'meerkatmillionaires' +# non-binary in model: collection_rank, temperature, weight +# non-binary in model; exclude from rarity: pct, rank, score +# exclude from model: lucky_number, shower +# exclude from model and rarity %: meteor_id, attribute_count, cracking_date +ALL_NUMERIC_COLS = ['rank','score','pct'] +MODEL_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count'] +} +RARITY_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score'] +} +NUMERIC_COLS = { + 'Levana Dragon Eggs': ['collection_rank','weight','temperature','transformed_collection_rank','rarity_score'] +} +ATT_EXCLUDE_COLS = { + 'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank'] +} +# for collection in [ 'Levana Dragon Eggs' ]: +for collection in s_df.collection.unique(): print('Working on collection {}'.format(collection)) - p_metadata = metadata[collection] - if 'attribute_count' in p_metadata.columns: - p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int) - - p_sales = sales[collection] - # specify the predictive features - p_pred_cols = pred_cols[collection] - p_features = collection_features[collection] - p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - for c in [ 'rank','score' ]: - p_metadata[c] = p_metadata[c].astype(float) - # p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - # p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - p_sales['contract_address'] = '' - p_metadata['contract_address'] = '' + sales = s_df[ s_df.collection == collection ] + metadata = m_df[ m_df.collection == collection ] + metadata[metadata.token_id == '1'] + metadata[metadata.feature_name == 'rank'] + metadata.feature_name.unique() - # remove 1 columns for each group (since they are colinear) - # exclude = [] - # for f in p_features: - # e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1] - # exclude.append(e) + # categorize columns + all_names = sorted(metadata.feature_name.unique()) + model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else [] + num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS) + num_features = [ x for x in num_features if x in metadata.feature_name.unique() ] + num_metadata = metadata[metadata.feature_name.isin(num_features)] + num_metadata[num_metadata.feature_name == 'rank'] + cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ]) + cat_metadata = metadata[metadata.feature_name.isin(cat_features)] - df = p_sales.merge(p_metadata, on=['token_id','contract_address']) - df = df[df.mn_20.notnull()] + # create dummies for binary variables + num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + num_metadata.columns = [ 'collection','token_id' ] + num_features + + # create dummies for binary variables + cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + cat_metadata.columns = [ 'collection','token_id' ] + cat_features + cat_metadata = calculate_percentages( cat_metadata, cat_features ) + dummies = pd.get_dummies(cat_metadata[cat_features]) + cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) + del cat_metadata['pct'] + + pred_cols = num_features + list(dummies.columns) + + # create training df + df = merge(sales, num_metadata, ['collection','token_id'], ensure=False) + df = merge(df, cat_metadata, ['collection','token_id']) + for c in num_features: + df[c] = df[c].apply(lambda x: just_float(x)) + + # create target cols target_col = 'adj_price' df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) - # df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 ) - # tmp = df[['block_timestamp','mn_20']].copy() - # tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) - # tmp = tmp.groupby('tmp').mn_20.median().reset_index() - # tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False) - # df['timestamp'] = df.block_timestamp.astype(int) df = df[df[target_col].notnull()] - df = df.reset_index(drop=True) - df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) ) + df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) df['rel_price_0'] = df[target_col] - df.mn_20 df['rel_price_1'] = df[target_col] / df.mn_20 df = df[df.mn_20 > 0] df['log_mn_20'] = np.log(df.mn_20) print('Training on {} sales'.format(len(df))) - # df['price_median'] = df.groupby('token_id').price.median() + df = standardize_df(df, pred_cols) - # standardize columns to mean 0 sd 1 - len(p_pred_cols) - df = standardize_df(df, p_pred_cols) - std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ] - # p_pred_cols = [ c for c in p_pred_cols if not c in exclude ] - std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ] - df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) - # df.sort_values('block_timestamp').head(10)[['price','tx_id']] - # df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values - # df = df[df.price >= 1] + std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ] + std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ] ######################### # Run the Model # ######################### - len(df) - len(df.dropna(subset=std_pred_cols)) tmp = df[std_pred_cols].count().reset_index() tmp.columns = ['a','b'] tmp.sort_values('b').head(20) rem = list(tmp[tmp.b==0].a.values) std_pred_cols = [ c for c in std_pred_cols if not c in rem ] + # if collection == 'Levana Dragon Eggs': + # std_pred_cols = [ 'std_genus_Titan','std_score','std_weight','std_transformed_collection_rank','std_collection_rank','std_legendary_composition_None','std_ancient_composition_None' ] mn = df.timestamp.min() mx = df.timestamp.max() - df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) ) + df['wt'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) ) X = df[std_pred_cols].values - mu = df.log_price.mean() - sd = df.log_price.std() - df['std_log_price'] = (df.log_price - mu) / sd - # y = df.std_log_price.values - # y = df[target_col].values - # y = df.rel_price_1.values y_0 = df.rel_price_0.values y_1 = df.rel_price_1.values - # y_log = df.log_price.values - clf_lin = RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_lin.fit(X, y_0, df.weight.values) + + # run the linear model + clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + # clf_lin = RidgeCV(alphas=[1.5**x for x in range(20)]) + clf_lin.fit(X, y_0, df.wt.values) + if collection == 'Levana Dragon Eggs': + coefs = [] + for a, b in zip(std_pred_cols, clf_lin.coef_): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + coefs.to_csv('~/Downloads/levana_lin_coefs.csv', index=False) df['pred_lin'] = clf_lin.predict(X) df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) ) - # df['err_lin'] = abs(df.pred_lin - df.price ) - # df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50) - df.head() - clf_log = RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_log.fit(X, y_1, df.weight.values) + + # run the log model + clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + # clf_log = RidgeCV(alphas=[1.5**x for x in range(20)]) + clf_log.fit(X, y_1, df.wt.values) + if collection == 'Levana Dragon Eggs': + coefs = [] + for a, b in zip(std_pred_cols, clf_lin.coef_): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False) df['pred_log'] = clf_log.predict(X) df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) ) df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50) - df['err'] = df.err_lin * df.err_log - df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50) - df['collection'] = collection - excludedf = excludedf.append(df[df.err > 2][['collection','token_id','price']]) - # df = df[df.err < 2] - print(round(len(df[df.err > 2]) * 100.0 / len(df), 2)) - - df[(df.err_log > 1) & (df.err_lin >= 5)] - - clf_log = RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_log.fit(X, y_1, df.weight.values) - - clf_log = RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_log.fit(X, y_1, df.weight.values) - df['pred_lin'] = clf_lin.predict(X) - df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 - # df['pred_log'] = np.exp(clf_log.predict(X)) - df['pred_log'] = clf_log.predict(X) - df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 + # combine the models clf = LinearRegression(fit_intercept=False) - clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values ) + clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values ) print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price']) @@ -335,34 +292,8 @@ for collection in ['Levana Dragon Eggs']: df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) coefsdf = coefsdf.append(tmp) df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) - df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]] - # df[df.block_timestamp>='2021-10-01'].err.mean() - df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).head(20).tx_id.values - # print(np.mean(y)) - # print(np.mean(clf.predict(X))) - - # # run neural net - # model = tf.keras.models.Sequential([ - # tf.keras.layers.Dense(9, activation='relu') - # , tf.keras.layers.Dropout(.2) - # , tf.keras.layers.Dense(3, activation='relu') - # , tf.keras.layers.Dropout(.2) - # , tf.keras.layers.Dense(1, activation='linear') - # ]) - # model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025)) - # model.fit(X, y, epochs=500, validation_split=0.3) - - # df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu) - # df['pred'] = model.predict(df[std_pred_cols].values) - # ratio = df.price.mean() / df.pred.mean() - # print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1))) - - # checking errors - # df['pred'] = df.pred * ratio + # print out some summary stats df['err'] = df[target_col] - df.pred df['q'] = df.pred.rank() * 10 / len(df) df['q'] = df.q.apply(lambda x: int(round(x)) ) @@ -373,137 +304,56 @@ for collection in ['Levana Dragon Eggs']: df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) df['pred_sd'] = df.pred * pe_sd print(df.groupby('q')[['err','pred',target_col]].mean()) - print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean()) + print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean()) # df.err.mean() # df[df.weight >= 3.5].err.mean() df['collection'] = collection print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) - salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) ) + salesdf = salesdf.append( df[[ 'collection','token_id','block_timestamp','price','pred','mn_20','rank' ]].sort_values('block_timestamp', ascending=0) ) - # create the attributes dataframe - for f in p_features: - cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]] - cur.columns = [ 'token_id', 'value','rarity' ] - cur['feature'] = f - cur['collection'] = collection - attributes = attributes.append(cur) - # create predictions for each NFT in the collection - test = p_metadata.copy() + ############################################################ + # Create Predictions for Each NFT in The Collection # + ############################################################ + test = merge(num_metadata, cat_metadata, ['collection','token_id']) + for c in num_features: + test[c] = test[c].apply(lambda x: just_float(x) ) tail = df.sort_values('timestamp').tail(1) for c in [ 'std_timestamp','mn_20','log_mn_20' ]: if c in tail.columns: test[c] = tail[c].values[0] - test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df) - # test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values ) - # test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values )) + test = standardize_df(test, pred_cols, df) test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values) test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) - # test['pred_lin'] = df.pred_lin + df.mn_20 - # df['pred_log'] = np.exp(clf_log.predict(X)) test['pred_log'] = clf_log.predict(test[std_pred_cols].values) test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l - test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) - # test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio - test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) ) + test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) if not CHECK_EXCLUDE: - test['pred_price'] = test.pred.apply(lambda x: (x*0.985) ) - test['pred_sd'] = test.pred * pe_sd - test['rk'] = test.pred.rank(ascending=0, method='first') + test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) ) + test['pred_sd'] = test.pred_price * pe_sd + test['rk'] = test.pred_price.rank(ascending=0, method='first') test['collection'] = collection - pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') ) - # print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price')) + pred_price = pred_price.append( test[[ 'collection','token_id','rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ) + cols = metadata.feature_name.unique() + cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ] + exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else [] + for c in cols: + cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']] + if c in exclude: + cur['rarity'] = None + else: + g = cur.groupby('feature_value').token_id.count().reset_index() + g['rarity'] = g.token_id / len(cur.token_id.unique()) + cur = merge(cur, g[['feature_value','rarity']]) + attributes = attributes.append(cur) - ############################## - # Feature Importance # - ############################## - coefs = [] - for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_): - coefs += [[ collection, a, b, c ]] - coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef']) - # coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() ) - # coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) ) - # coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] ) - # mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index() - # mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ] - # coefs = coefs.merge(mn) - # coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef - # coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef - # coefs - # g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates() - # g['value'] = g.value.astype(str) - # len(coefs) - # g = coefs.merge(g, how='left') - # g[g.rarity.isnull()] - # len(g) - # coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] ) - # coefs.sort_values('lin_coef').tail(20) - - # TODO: pick the most common one and have that be the baseline - most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1) - most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 ) - mc = most_common.col.unique() - data = [] - for c0 in std_pred_cols_0: - if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']: - continue - f = '_'.join(re.split('_', c0)[1:-1]) - v = re.split('_', c0)[-1] - rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0] - # avg = p_metadata['{}_pct'.format(f)].mean() - # avg_pct = df.pct.mean() - # pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std() - r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean() - s = df[df['{}_{}'.format(f, v)]==1].std_score.mean() - if r == r and s == s: - datum = [ c0, rarity ] - for c1 in std_pred_cols: - datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 ) - data += [ datum ] - - importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols) - sorted(importance.feature.unique()) - importance[importance.feature == 'std_fur_/_skin_Leopard'] - if 'std_timestamp' in df.columns: - importance['std_timestamp'] = df.std_timestamp.max() - # importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values ) - # importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values )) - - importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values) - importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l) - # importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l) - importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values) - importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l - # importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l - - importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values ) - # importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu) - importance = importance.sort_values('pred', ascending=0) - importance.head()[['feature','pred']] - importance[importance.feature == 'std_fur_/_skin_Leopard'] - importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x)) - importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1]) - importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1])) - mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'}) - importance = importance.merge(mn) - importance['pred_vs_baseline'] = importance.pred - importance.baseline - importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1 - importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity') - importance['collection'] = collection - importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']] - tmp = importance[std_pred_cols].mean().reset_index() - tmp.columns = [ 'a', 'b' ] - tmp = tmp.sort_values('b') - feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']]) - -attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() ) -feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() ) - -pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]] - +attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) +sorted(attributes['feature_name'].unique()) +if len(feature_values): + feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) coefsdf.to_csv('./data/coefsdf.csv', index=False) salesdf.to_csv('./data/model_sales.csv', index=False) @@ -511,24 +361,6 @@ pred_price.to_csv('./data/pred_price.csv', index=False) attributes.to_csv('./data/attributes.csv', index=False) feature_values.to_csv('./data/feature_values.csv', index=False) -pred_price = pd.read_csv('./data/pred_price.csv') -tokens = pd.read_csv('./data/tokens.csv') -rem = tokens[tokens.clean_token_id>=10000].token_id.unique() -l0 = len(pred_price) -pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ] -l1 = len(pred_price) -pred_price.to_csv('./data/pred_price.csv', index=False) - -# excludedf.to_csv('./data/excludedf.csv', index=False) -# listings = pd.read_csv('./data/listings.csv') -# listings['token_id'] = listings.token_id.astype(int) - -# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) -# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) -# tmp['token_id'] = tmp.token_id.astype(int) -# tmp = tmp.merge(listings[['collection','token_id','price']]) -# tmp.sort_values('pred_price', ascending=0) - if CHECK_EXCLUDE: salesdf['rat'] = salesdf.price / salesdf.pred salesdf['dff'] = salesdf.price - salesdf.pred @@ -542,9 +374,3 @@ if CHECK_EXCLUDE: print(salesdf.exclude.mean()) salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) - -attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ] -feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ] -sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique()) - -pred_price[pred_price.collection == 'peskypenguinclub'].head() \ No newline at end of file diff --git a/solana_model_old.py b/solana_model_old.py new file mode 100644 index 00000000..f4002140 --- /dev/null +++ b/solana_model_old.py @@ -0,0 +1,574 @@ +import os +import re +import json +import warnings +import requests +import numpy as np +import pandas as pd +import urllib.request +import tensorflow as tf +import snowflake.connector +from datetime import datetime +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LinearRegression, RidgeCV, Lasso +from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV + +warnings.filterwarnings('ignore') + +os.chdir('/Users/kellenblumberg/git/nft-deal-score') + +CHECK_EXCLUDE = False +CHECK_EXCLUDE = True + +# Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400 + +################################### +# Define Helper Functions # +################################### +def standardize_df(df, cols, usedf=None, verbose=False): + for c in cols: + if type(usedf) != type(pd.DataFrame()): + usedf = df + mu = usedf[c].mean() + sd = usedf[c].std() + if verbose: + print(c) + if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: + df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) + else: + df['std_{}'.format(c)] = (df[c] - mu) / sd + return(df) + +def just_float(x): + x = re.sub('[^\d\.]', '', str(x)) + return(float(x)) + +def calculate_percentages(df, cols=[]): + add_pct = not 'pct' in df.columns + if not len(cols): + cols = df.columns + if add_pct: + df['pct'] = 1 + for c in cols: + g = df[c].value_counts().reset_index() + g.columns = [ c, 'N' ] + col = '{}_pct'.format(c) + g[col] = g.N / g.N.sum() + df = df.merge( g[[ c, col ]] ) + if add_pct: + df['pct'] = df.pct * df[col] + return(df) + +exclude = [ + # (collection, token_id, price) + ( 'aurory', 2239, 3500 ) + # ( 'aurory', 856, 150 ) + # ( 'aurory', 4715, 500 ) + # ( 'aurory', 5561, 298 ) + # ( 'aurory', 5900, 199 ) + # ( 'aurory', 3323, 138 ) +] +s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) +s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head() +print(len(s_df[s_df.collection == 'Levana Dragon Eggs'])) +print(s_df.groupby('collection').token_id.count()) +s_df.collection.unique() +s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] +s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] +s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] +for e in exclude: + s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] +s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] + +# exclude wierd data points +if not CHECK_EXCLUDE: + exclude = pd.read_csv('./data/exclude.csv') + s_df = s_df.merge(exclude, how='left') + s_df = s_df[s_df.exclude.isnull()] + del s_df['exclude'] + +m_df = pd.read_csv('./data/metadata.csv') +m_df['token_id'] = m_df.token_id.astype(str) +tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])] +tmp['tmp'] = tmp.token_id.astype(int) +tmp.groupby('collection').tmp.max() +m_df.head() +# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') ) +s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) +s_df['timestamp'] = s_df.block_timestamp.astype(int) +# del metadata['price'] +# del metadata['last_sale'] +s_df = s_df.sort_values(['collection','block_timestamp']) +s_df['mn_20'] = s_df.groupby('collection').price.shift(1) +s_df = s_df.sort_values(['collection','block_timestamp']) +s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) +s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago']) + +s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True) +s_df = s_df.sort_values(['collection','block_timestamp']) +# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True) +s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) +# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ] + +s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ] +s_df = s_df.sort_values(['collection','block_timestamp']) +s_df['mn_20'] = s_df.groupby('collection').price.shift(1) +s_df = s_df.sort_values(['collection','block_timestamp']) +# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True) +s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) +s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40) +s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price') +s_df['tmp'] = s_df.mn_20 / s_df.md_20 + +tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']] +tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) +tmp = tmp.groupby('date').mn_20.median().reset_index() +tmp.to_csv('~/Downloads/tmp.csv', index=False) + +s_df['tmp'] = s_df.price / s_df.mn_20 +s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']] +s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']] +s_df.groupby('collection').tmp.median() +s_df.groupby('collection').tmp.mean() + +s_df.sort_values('tmp').head() +s_df['tmp'] = s_df.price / s_df.mn_20 +s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False) +s_df.groupby('collection').tmp.median() +s_df.groupby('collection').tmp.mean() +s_df.sort_values('tmp', ascending=0).head() +s_df.head(21) +m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] +# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() ) +# m_df.feature_value.unique() +pred_cols = {} +metadata = {} +sales = {} +collection_features = {} +m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id') +c = 'Levana Dragon Eggs' +# pred_cols[c] +EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count'] +} +NUMERIC_COLS = { + 'Levana Dragon Eggs': ['rank','score','pct','collection_rank','weight','temperature'] +} +for c in s_df.collection.unique(): + print('Building {} model'.format(c)) + exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else [] + n_cols = NUMERIC_COLS[c] if c in NUMERIC_COLS.keys() else [] + exclude = [ x for x in exclude if not x in n_cols ] + o_cols = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if (not x in exclude) and not (x in n_cols) ]) + + sales[c] = s_df[ s_df.collection == c ] + pred_cols[c] = sorted( n_cols + o_cols ) + collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ] + metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ] + + # tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] ) + metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c] + + features = collection_features[c] + cur = metadata[c] + # cur = cur.dropna(subset=features) + for f in features: + if type(cur[f].values[0] == str): + cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) ) + cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip()) + cur = cur.replace('', 'Default') + # if not 'pct' in cur.columns: + cur = calculate_percentages( cur, o_cols ) + dummies = pd.get_dummies(cur[o_cols]) + # feature_cols = dummies.columns + cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) + metadata[c] = cur + # pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns) + # cols = [ 'collection_rank' ] + # cols = [ ] + # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns) + # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + list(dummies.columns) + pred_cols[c] = n_cols + list(dummies.columns) + +# collection_features = { +# 'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ] +# , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ] +# , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] +# , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] +# # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] +# } + +coefsdf = pd.DataFrame() +salesdf = pd.DataFrame() +attributes = pd.DataFrame() +pred_price = pd.DataFrame() +feature_values = pd.DataFrame() +collections = sorted(metadata.keys()) +collection = 'Galactic Punks' +tokens = pd.read_csv('./data/tokens.csv') +collection = 'Levana Dragon Eggs' +# for collection in s_df.collection.unique(): +for collection in ['Levana Dragon Eggs']: + # collection = 'LunaBulls' + # collection = 'smb' + # collection = 'aurory' + # collection = 'meerkatmillionaires' + print('Working on collection {}'.format(collection)) + p_metadata = metadata[collection] + if 'attribute_count' in p_metadata.columns: + p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int) + + p_sales = sales[collection] + # specify the predictive features + p_pred_cols = pred_cols[collection] + if collection == 'Levana Dragon Eggs': + p_pred_cols += [ 'transformed_collection_rank' ] + p_features = collection_features[collection] + p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) + p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) + for c in [ 'rank','score' ]: + p_metadata[c] = p_metadata[c].astype(float) + # p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) + # p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) + p_sales['contract_address'] = '' + p_metadata['contract_address'] = '' + + # remove 1 columns for each group (since they are colinear) + # exclude = [] + # for f in p_features: + # e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1] + # exclude.append(e) + + df = p_sales.merge(p_metadata, on=['token_id','contract_address']) + df = df[df.mn_20.notnull()] + target_col = 'adj_price' + df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) + # df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 ) + # tmp = df[['block_timestamp','mn_20']].copy() + # tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) + # tmp = tmp.groupby('tmp').mn_20.median().reset_index() + # tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False) + # df['timestamp'] = df.block_timestamp.astype(int) + df = df[df[target_col].notnull()] + df = df.reset_index(drop=True) + df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) ) + df['rel_price_0'] = df[target_col] - df.mn_20 + df['rel_price_1'] = df[target_col] / df.mn_20 + df = df[df.mn_20 > 0] + df['log_mn_20'] = np.log(df.mn_20) + print('Training on {} sales'.format(len(df))) + # df['price_median'] = df.groupby('token_id').price.median() + + # standardize columns to mean 0 sd 1 + len(p_pred_cols) + n_cols = NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else [] + for c in n_cols: + df[c] = df[c].apply(lambda x: just_float(x) ) + if collection == 'Levana Dragon Eggs': + df['transformed_collection_rank'] = df.collection_rank.apply(lambda x: (1.0/ x)**2 ) + df = standardize_df(df, p_pred_cols) + std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ] + # p_pred_cols = [ c for c in p_pred_cols if not c in exclude ] + std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ] + df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) + # df.sort_values('block_timestamp').head(10)[['price','tx_id']] + # df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values + # df = df[df.price >= 1] + + ######################### + # Run the Model # + ######################### + len(df) + len(df.dropna(subset=std_pred_cols)) + tmp = df[std_pred_cols].count().reset_index() + tmp.columns = ['a','b'] + tmp.sort_values('b').head(20) + rem = list(tmp[tmp.b==0].a.values) + std_pred_cols = [ c for c in std_pred_cols if not c in rem ] + mn = df.timestamp.min() + mx = df.timestamp.max() + df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) ) + X = df[std_pred_cols].values + mu = df.log_price.mean() + sd = df.log_price.std() + df['std_log_price'] = (df.log_price - mu) / sd + # y = df.std_log_price.values + # y = df[target_col].values + # y = df.rel_price_1.values + y_0 = df.rel_price_0.values + y_1 = df.rel_price_1.values + # y_log = df.log_price.values + + clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + clf_lin.fit(X, y_0, df.weight.values) + coefs = [] + for a, b in zip(std_pred_cols, clf_lin.coef_): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + coefs.to_csv('~/Downloads/tmp.csv', index=False) + df['pred_lin'] = clf_lin.predict(X) + df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 + df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) ) + # df['err_lin'] = abs(df.pred_lin - df.price ) + # df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50) + df.head() + clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + clf_log.fit(X, y_1, df.weight.values) + coefs = [] + for a, b in zip(std_pred_cols, clf_log.coef_): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + coefs.to_csv('~/Downloads/tmp.csv', index=False) + df['pred_log'] = clf_log.predict(X) + df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 + df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) ) + df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50) + df['err'] = df.err_lin * df.err_log + + df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50) + df['collection'] = collection + + # df['pred_lin'] = clf_lin.predict(X) + # df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 + # df['pred_log'] = np.exp(clf_log.predict(X)) + # df['pred_log'] = clf_log.predict(X) + # df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 + clf = LinearRegression(fit_intercept=False) + clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values ) + print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) + l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] + tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price']) + if clf.coef_[0] < 0: + print('Only using log') + df['pred'] = df.pred_log + tmp['lin_coef'] = 0 + tmp['log_coef'] = 1 + elif clf.coef_[1] < 0: + print('Only using lin') + df['pred'] = df.pred_lin + tmp['lin_coef'] = 1 + tmp['log_coef'] = 0 + else: + print('Only using BOTH!') + df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) + coefsdf = coefsdf.append(tmp) + df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) + df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]] + # df[df.block_timestamp>='2021-10-01'].err.mean() + df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]] + df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] + df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] + df.sort_values('price', ascending=0).head(20).tx_id.values + + # print(np.mean(y)) + # print(np.mean(clf.predict(X))) + + # # run neural net + # model = tf.keras.models.Sequential([ + # tf.keras.layers.Dense(9, activation='relu') + # , tf.keras.layers.Dropout(.2) + # , tf.keras.layers.Dense(3, activation='relu') + # , tf.keras.layers.Dropout(.2) + # , tf.keras.layers.Dense(1, activation='linear') + # ]) + # model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025)) + # model.fit(X, y, epochs=500, validation_split=0.3) + + # df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu) + # df['pred'] = model.predict(df[std_pred_cols].values) + # ratio = df.price.mean() / df.pred.mean() + # print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1))) + + # checking errors + # df['pred'] = df.pred * ratio + df['err'] = df[target_col] - df.pred + df['q'] = df.pred.rank() * 10 / len(df) + df['q'] = df.q.apply(lambda x: int(round(x)) ) + df['pct_err'] = (df[target_col] / df.pred) - 1 + pe_mu = df.pct_err.mean() + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std() + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std() + df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) + df['pred_sd'] = df.pred * pe_sd + print(df.groupby('q')[['err','pred',target_col]].mean()) + print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean()) + # df.err.mean() + # df[df.weight >= 3.5].err.mean() + df['collection'] = collection + print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) + salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) ) + + # create the attributes dataframe + for f in p_features: + if f and '{}_pct'.format(f) in p_metadata.columns: + cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]] + cur.columns = [ 'token_id', 'value','rarity' ] + cur['feature'] = f + cur['collection'] = collection + attributes = attributes.append(cur) + + # create predictions for each NFT in the collection + test = p_metadata.copy() + for c in n_cols: + test[c] = test[c].apply(lambda x: just_float(x) ) + if collection in [ 'Levana Dragon Eggs' ]: + test['transformed_collection_rank'] = test.collection_rank.apply(lambda x: (1.0 / x) ** 2 ) + tail = df.sort_values('timestamp').tail(1) + for c in [ 'std_timestamp','mn_20','log_mn_20' ]: + if c in tail.columns: + test[c] = tail[c].values[0] + test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df, True) + # test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values ) + # test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values )) + + test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values) + test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) + # test['pred_lin'] = df.pred_lin + df.mn_20 + # df['pred_log'] = np.exp(clf_log.predict(X)) + test['pred_log'] = clf_log.predict(test[std_pred_cols].values) + test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l + + test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) + # test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio + test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) ) + if not CHECK_EXCLUDE: + test['pred_price'] = test.pred.apply(lambda x: (x*0.985) ) + test['pred_sd'] = test.pred * pe_sd + test['rk'] = test.pred.rank(ascending=0, method='first') + test['collection'] = collection + pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') ) + # print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price')) + + + ############################## + # Feature Importance # + ############################## + coefs = [] + for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_): + coefs += [[ collection, a, b, c ]] + coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef']) + # coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() ) + # coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) ) + # coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] ) + # mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index() + # mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ] + # coefs = coefs.merge(mn) + # coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef + # coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef + # coefs + # g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates() + # g['value'] = g.value.astype(str) + # len(coefs) + # g = coefs.merge(g, how='left') + # g[g.rarity.isnull()] + # len(g) + # coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] ) + # coefs.sort_values('lin_coef').tail(20) + + # TODO: pick the most common one and have that be the baseline + most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1) + most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 ) + mc = most_common.col.unique() + data = [] + for c0 in std_pred_cols_0: + if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']: + continue + f = '_'.join(re.split('_', c0)[1:-1]) + v = re.split('_', c0)[-1] + rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0] + # avg = p_metadata['{}_pct'.format(f)].mean() + # avg_pct = df.pct.mean() + # pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std() + r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean() + s = df[df['{}_{}'.format(f, v)]==1].std_score.mean() + if r == r and s == s: + datum = [ c0, rarity ] + for c1 in std_pred_cols: + datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 ) + data += [ datum ] + + importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols) + sorted(importance.feature.unique()) + importance[importance.feature == 'std_fur_/_skin_Leopard'] + if 'std_timestamp' in df.columns: + importance['std_timestamp'] = df.std_timestamp.max() + # importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values ) + # importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values )) + + importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values) + importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l) + # importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l) + importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values) + importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l + # importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l + + importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values ) + # importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu) + importance = importance.sort_values('pred', ascending=0) + importance.head()[['feature','pred']] + importance[importance.feature == 'std_fur_/_skin_Leopard'] + importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x)) + importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1]) + importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1])) + mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'}) + importance = importance.merge(mn) + importance['pred_vs_baseline'] = importance.pred - importance.baseline + importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1 + importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity') + importance['collection'] = collection + importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']] + tmp = importance[std_pred_cols].mean().reset_index() + tmp.columns = [ 'a', 'b' ] + tmp = tmp.sort_values('b') + feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']]) + +attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() ) +feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() ) + +pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]] + + +coefsdf.to_csv('./data/coefsdf.csv', index=False) +salesdf.to_csv('./data/model_sales.csv', index=False) +pred_price.to_csv('./data/pred_price.csv', index=False) +attributes.to_csv('./data/attributes.csv', index=False) +feature_values.to_csv('./data/feature_values.csv', index=False) + +pred_price = pd.read_csv('./data/pred_price.csv') +tokens = pd.read_csv('./data/tokens.csv') +rem = tokens[tokens.clean_token_id>=10000].token_id.unique() +l0 = len(pred_price) +pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ] +l1 = len(pred_price) +pred_price.to_csv('./data/pred_price.csv', index=False) + +# listings = pd.read_csv('./data/listings.csv') +# listings['token_id'] = listings.token_id.astype(int) + +# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) +# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) +# tmp['token_id'] = tmp.token_id.astype(int) +# tmp = tmp.merge(listings[['collection','token_id','price']]) +# tmp.sort_values('pred_price', ascending=0) + +if CHECK_EXCLUDE: + salesdf['rat'] = salesdf.price / salesdf.pred + salesdf['dff'] = salesdf.price - salesdf.pred + salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['rat'] = salesdf.pred / salesdf.price + salesdf['dff'] = salesdf.pred - salesdf.price + salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0)) + print(salesdf.exclude_1.mean()) + print(salesdf.exclude_2.mean()) + print(salesdf.exclude.mean()) + salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] + salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) + +attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ] +feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ] +sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique()) + +pred_price[pred_price.collection == 'peskypenguinclub'].head() \ No newline at end of file diff --git a/viz/server.R b/viz/server.R index 75f01eba..6e7c14b6 100644 --- a/viz/server.R +++ b/viz/server.R @@ -45,7 +45,7 @@ server <- function(input, output, session) { selectInput( inputId = 'collectionname' , label = NULL - , selected = 'LunaBulls' + , selected = 'Levana Dragon Eggs' , choices = choices , width = "100%" ) @@ -197,24 +197,27 @@ server <- function(input, output, session) { return(head(attributes, 0)) } cur <- attributes[ token_id == eval(as.numeric(id)) & collection == eval(selected) ] - cur <- merge( cur, feature_values[collection == eval(selected), list(feature, value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE ) + # cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE ) cur <- cur[order(rarity)] - floor <- getFloors()[2] - log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1] - lin_coef <- coefsdf[ collection == eval(selected) ]$lin_coef[1] - s <- sum(cur$pct_vs_baseline) - p <- getPredPrice() - p <- as.numeric(p[ token_id == eval(as.numeric(id)) ]$pred_price) - # p <- pred_price[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]$pred_price - ratio <- (p / floor) - 1 - ratio <- pmax(0, ratio) - if (ratio > 0 & length(ratio) > 0) { - mult <- ratio / s - cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ] - } - cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ] - cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ] - cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ] + # floor <- getFloors()[2] + # log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1] + # lin_coef <- coefsdf[ collection == eval(selected) ]$lin_coef[1] + # s <- sum(cur$pct_vs_baseline) + # p <- getPredPrice() + # p <- as.numeric(p[ token_id == eval(as.numeric(id)) ]$pred_price) + # # p <- pred_price[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]$pred_price + # ratio <- (p / floor) - 1 + # ratio <- pmax(0, ratio) + # if (ratio > 0 & length(ratio) > 0) { + # mult <- ratio / s + # cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ] + # } + cur[, vs_baseline := 0 ] + cur[, pred_vs_baseline := 0 ] + cur[, vs_baseline := 0 ] + # cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ] + # cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ] + # cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ] return(cur) }) @@ -223,9 +226,11 @@ server <- function(input, output, session) { if( nrow(data) == 0 ) { return(NULL) } - data[, rarity := paste0(format(round(rarity*100, 2), digits=4, decimal.mark="."),'%') ] + data[, rarity := ifelse(is.na(rarity), '', paste0(format(round(rarity*100, 2), digits=4, decimal.mark="."),'%') )] + # reactable(data[, list( feature, value, rarity, vs_baseline, pred_vs_baseline, pct_vs_baseline )], - data <- data[, list( feature, value, rarity, pct_vs_baseline )] + # data <- data[, list( feature, value, rarity, pct_vs_baseline )] + data <- data[, list( feature_name, feature_value, rarity )] reactable(data, defaultColDef = colDef( headerStyle = list(background = "#10151A") @@ -234,17 +239,17 @@ server <- function(input, output, session) { borderless = TRUE, outlined = FALSE, columns = list( - feature = colDef(name = "Attribute", align = "left"), - value = colDef(name = "Value", align = "left"), - rarity = colDef(name = "Rarity", align = "left"), - pct_vs_baseline = colDef( - name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor") - , html = TRUE - , align = "left" - , cell = function(x) { - htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%')) - } - ) + feature_name = colDef(name = "Attribute", align = "left"), + feature_value = colDef(name = "Value", align = "left"), + rarity = colDef(name = "Rarity", align = "left") + # pct_vs_baseline = colDef( + # name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor") + # , html = TRUE + # , align = "left" + # , cell = function(x) { + # htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%')) + # } + # ) ) ) }) @@ -255,7 +260,7 @@ server <- function(input, output, session) { return(NULL) } data <- feature_values[ collection == eval(selected)] - reactable(data[, list( feature, value, rarity, pct_vs_baseline )], + reactable(data[, list( feature_name, feature_value, rarity, pct_vs_baseline )], defaultColDef = colDef( headerStyle = list(background = "#10151A") ), @@ -263,8 +268,8 @@ server <- function(input, output, session) { outlined = FALSE, searchable = TRUE, columns = list( - feature = colDef(name = "Attribute", align = "left"), - value = colDef(name = "Value", align = "left"), + feature_name = colDef(name = "Attribute", align = "left"), + feature_value = colDef(name = "Value", align = "left"), rarity = colDef(name = "Rarity", align = "left", cell = function(x) { htmltools::tags$span(paste0(format(x*100, digits=3, decimal.mark=".", big.mark=","),'%')) }), @@ -504,7 +509,9 @@ server <- function(input, output, session) { df[, deal_score := round(pmin( 100, pmax(0, deal_score) )) ] df[, deal_score := pnorm(price, pred_price, eval(SD_SCALE) * pred_sd * pred_price / pred_price_0), by = seq_len(nrow(df)) ] df[, deal_score := round(100 * (1 - deal_score)) ] - df[, pred_price := round(pred_price) ] + # df[, pred_price := round(pred_price) ] + df[, pred_price := paste0(format(round(pred_price, 1), digits=3, decimal.mark=".", big.mark=",")) ] + df <- df[, list(token_id, price, pred_price, deal_score)] df <- df[order(-deal_score)] return(df) @@ -517,7 +524,7 @@ server <- function(input, output, session) { if( nrow(df) == 0 ) { return(NULL) } - df <- df[ deal_score >= 10 ] + df <- df[ deal_score >= 0 ] df[, hover_text := paste0('#',token_id,'
Listing Price: ',price,'
Fair Market Price: ',pred_price,'
Deal Score: ',deal_score) ] fig <- plot_ly( diff --git a/viz/ui.R b/viz/ui.R index 0220f8cc..1c335fbd 100644 --- a/viz/ui.R +++ b/viz/ui.R @@ -102,17 +102,33 @@ fluidPage( , fluidRow( class="grey8row" , h2("Listings", icon(class="padding-left-10", id="listings-tooltip", "info-circle")) - , bsTooltip(id = "listings-tooltip", title = "Plot only shows listings with deal score > 10; Click a dot to select the token", placement = "bottom", trigger = "hover") + , bsTooltip(id = "listings-tooltip", title = "Plot only shows listings with deal score > 5; Click a dot to select the token", placement = "bottom", trigger = "hover") , div( class = "listing-plot" , plotlyOutput("listingplot", height = 500) - , div(class='description', 'Plot only shows listings with deal score > 10') + , div(class='description', 'Plot only shows listings with deal score > 5') , div(class='description', 'Click a dot to select the token') ) , div(class = "table", reactableOutput("listingtable")) , div(class = "description", 'This app is still in beta - listings updates will be periodic (but at least 3x a week)') , div(class = "link", uiOutput('listingurl')) ) + , fluidRow( + class="grey8row faq" + , h2("FAQ") + , h4("What is NFT Deal Score?") + , div("We use historical sales data to determine the values and the rankings of each NFT.") + , h4("Why is this rank different?") + , div("Although rarity is a feature in our model, it is not just a rarity-based ranking. Certain features are put at a higher premium on the secondary marketplace, and this ranking reflects that.") + , h4("Why are the rarity %s different?") + , div("Our %s reflect only the NFTs in existence. Other tools may include more theoretical numbers.") + , h4("How does the model work?") + , div("Each attribute is an input into the model. We are working to add better model explanations to the tool.") + , h4("How often is the data updated?") + , div("Listings are updated 3x / week. Model is updated weekly.") + , h4("Where can I send my questions?") + , div(a(class="", href="https://twitter.com/nftdealscore", "@nftdealscore"), " on Twitter") + ) , fluidRow( class="grey8row" , h2("NFT Rankings", icon(class="padding-left-10", id="nft-rankings-tooltip", "info-circle")) @@ -127,11 +143,11 @@ fluidPage( , div(class = "table", reactableOutput("salestable")) , div(class = "description", 'This app is still in beta - sales data may be incomplete or delayed') ) - , fluidRow( - class="grey8row" - , h2("Feature Summary", icon(class="padding-left-10", id="feature-summary-tooltip", "info-circle")) - , bsTooltip(id = "feature-summary-tooltip", title = "Shows the rarity and estimated price impact of each feature", placement = "bottom", trigger = "hover") - , div(class = "table", reactableOutput("featurestable")) - , div(class = "description", 'Shows the rarity and estimated price impact of each feature') - ) + # , fluidRow( + # class="grey8row" + # , h2("Feature Summary", icon(class="padding-left-10", id="feature-summary-tooltip", "info-circle")) + # , bsTooltip(id = "feature-summary-tooltip", title = "Shows the rarity and estimated price impact of each feature", placement = "bottom", trigger = "hover") + # , div(class = "table", reactableOutput("featurestable")) + # , div(class = "description", 'Shows the rarity and estimated price impact of each feature') + # ) ) diff --git a/viz/www/styles.css b/viz/www/styles.css index 9a58a652..70ee1e53 100644 --- a/viz/www/styles.css +++ b/viz/www/styles.css @@ -264,6 +264,15 @@ tr { } +/****************/ +/* FAQ */ +/****************/ +.faq > h4 { + font-size: 22px; + padding-top: 32px; +} + + /*******************/ /* General */ /*******************/