Stoned Ape Crew

This commit is contained in:
flipside-kellen 2022-02-21 08:10:04 -08:00
parent 2129a728d0
commit 3675831f4b
15 changed files with 4515 additions and 65 deletions

1
.gitignore vendored
View File

@ -10,3 +10,4 @@
data/*
*.twb
viz/.DS_Store
data/mints/*.json

View File

@ -8,6 +8,7 @@ import requests
import pandas as pd
import urllib.request
import snowflake.connector
from utils import clean_name, clean_token_id
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
@ -41,6 +42,20 @@ def clean_colnames(df):
df.columns = names
return(df)
def add_collection_steps():
# 1. mint_address_token_id_map
# 2. scrape metadata
metadata = pd.read_csv('./data/metadata.csv')
metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x) )
sorted(metadata.collection.unique())
metadata.to_csv('./data/metadata.csv', index=False)
metadata[metadata.collection == 'Stoned Ape Crew']
metadata[metadata.collection == 'Stoned Ape Crew'].feature_name.unique()
# 3. scrape howrareis
# 4. add sales
# 5. run model
pass
def manual_clean():
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
df = pd.read_csv('./data/{}.csv'.format(c))
@ -49,7 +64,32 @@ def manual_clean():
df['clean_token_id'] = df.token_id
df.to_csv('./data/{}.csv'.format(c), index=False)
def add_solana_sales():
def mint_address_token_id_map_2():
old = pd.read_csv('./data/mint_address_token_id_map.csv')
old = pd.DataFrame()
mints = pd.read_csv('./data/solana_mints.csv')
data = []
for collection in [ 'Stoned Ape Crew','DeGods' ]:
for m in mints[mints.collection == collection].mint_address.unique():
pass
f = open('./data/mints/{}/{}.json'.format(collection, m))
j = json.load(f)
try:
token_id = int(re.split('#', j['name'])[1])
data += [[ collection, m, token_id, j['uri'] ]]
except:
print(m)
df = pd.DataFrame(data, columns=['collection','mint','token_id','uri'])
old = old.append(df).drop_duplicates()
print(old[old.token_id.notnull()].groupby('collection').token_id.count())
old.to_csv('./data/mint_address_token_id_map.csv', index=False)
def mint_address_token_id_map():
mints = pd.read_csv('./data/solana_mints.csv')
mints[mints.collection == 'Stoned Ape Crew'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
mints[mints.collection == 'Degods'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
mints[mints.collection == 'DeGods'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
old = pd.read_csv('./data/mint_address_token_id_map.csv')
my_file = open('./scripts/solana-rpc-app/output.txt', 'r')
content = my_file.read()
my_file.close()
@ -60,9 +100,20 @@ def add_solana_sales():
if len(s) > 1 and '#' in s[1]:
data += [[ re.split('"', s[0])[1], int(re.split('#', re.split('"', s[1])[1])[1]) ]]
df = pd.DataFrame(data, columns=['mint','token_id']).drop_duplicates()
df['collection'] = 'DeGods'
df.to_csv('./data/mint_address_token_id_map.csv', index=False)
def add_solana_sales():
print('Adding Solana sales...')
# read id map
id_map = pd.read_csv('./data/mint_address_token_id_map.csv')
id_map['collection'] = id_map.collection.apply(lambda x: clean_name(x) )
id_map.collection.unique()
query = '''
SELECT tx_id
, n.mint
, l.project_name
, n.block_timestamp AS sale_date
, (inner_instruction:instructions[0]:parsed:info:lamports
+ inner_instruction:instructions[1]:parsed:info:lamports
@ -72,16 +123,23 @@ def add_solana_sales():
LEFT JOIN crosschain.address_labels l ON LOWER(n.mint) = LOWER(l.address)
WHERE block_timestamp >= CURRENT_DATE - 200
AND instruction:data like '3UjLyJvuY4%'
AND l.project_name ilike 'degods'
AND l.project_name IN ('degods','stoned ape crew')
'''
sales = ctx.cursor().execute(query)
sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
sales = clean_colnames(sales)
print('Queried {} sales'.format(len(sales)))
sales['chain'] = 'Solana'
sales['collection'] = 'DeGods'
m = sales.merge(df, how='left', on=['mint'])
sales['collection'] = sales.project_name.apply(lambda x: clean_name(x) )
# m = sales.merge(id_map, how='left', on=['mint','collection'])
m = sales.merge(id_map, how='inner', on=['mint','collection'])
m.sort_values('collection')
m = m[[ 'collection','token_id','sale_date','price','chain' ]]
s_df = pd.read_csv('./data/sales.csv')
if 'collection_x' in s_df.columns and 'collection_y' in s_df.columns:
s_df['collection'] = s_df.collection.fillna(s_df.collection_x).fillna(s_df.collection_y)
del s_df['collection_x']
del s_df['collection_y']
l0 = len(s_df)
s_df = s_df[-s_df.collection.isin(sales.collection.unique())]
s_df = s_df.append(m)
@ -91,6 +149,8 @@ def add_solana_sales():
for c in [ 'mint','tmp' ]:
if c in s_df:
del s_df[c]
if 'project_name' in s_df.columns:
del s_df['project_name']
s_df.to_csv('./data/sales.csv', index=False)
pass
@ -428,7 +488,7 @@ def add_terra_sales():
WHEN 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2' THEN 'LunaBulls'
WHEN 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k' THEN 'Galactic Punks'
WHEN 'terra1vhuyuwwr4rkdpez5f5lmuqavut28h5dt29rpn6' THEN 'Levana Dragons'
WHEN 'terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7' THEN 'Levana Meteor Dust'
WHEN 'terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7' THEN 'Levana Dust'
WHEN 'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg' THEN 'Levana Eggs'
WHEN 'terra14gfnxnwl0yz6njzet4n33erq5n70wt79nm24el' THEN 'Levana Loot'
WHEN 'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v' THEN 'Levana Meteors'
@ -555,6 +615,7 @@ def add_terra_sales():
# tokens = pd.read_csv('./data/tokens.csv')
# tokens['tmp'] = tokens.token_id.apply(lambda x: (str(x)[:5]))
# tokens[tokens.collection == 'Galactic Punks'].to_csv('~/Downloads/tmp.csv', index=False)
sales.tokenid.values[:4]
sales['tokenid'] = sales.tokenid.apply(lambda x: str(int(float(x))) )
# tokens['token_id'] = tokens.token_id.astype(str)
@ -569,6 +630,10 @@ def add_terra_sales():
, 'amount': 'price'
, 'tokenid': 'token_id'
})
sales = clean_token_id(sales)
sales.token_id.values[:4]
# sales['token_id'] = sales.token_id.astype(int)
# tmp = sales.merge(tokens[['collection','token_id','clean_token_id']])
# sales[sales.tx_id.isin(['6CA1966B42D02F07D1FB6A839B8276D501FDF3EF048DECA5601C74D82EBB9D12',
# 'F5643C0C805F3236F67CFF1A6AC1FC50CF9DB61B846B3CE6F9D4CD3806284D4E',
@ -577,9 +642,9 @@ def add_terra_sales():
# sales.columns
sales['chain'] = 'Terra'
sales = sales[[ 'chain','collection','token_id','sale_date','price','tx_id' ]]
print(sales.groupby(['chain','collection']).token_id.count())
sales['token_id'] = sales.token_id.apply(lambda x: re.sub('"', '', x) )
sales['collection'] = sales.collection.apply(lambda x: 'Levana Dragon Eggs' if x=='Levana Eggs' else x )
# print(sales.groupby(['chain','collection']).token_id.count())
# sales['token_id'] = sales.token_id.apply(lambda x: re.sub('"', '', x) )
# sales['collection'] = sales.collection.apply(lambda x: 'Levana Dragon Eggs' if x=='Levana Eggs' else x )
old = pd.read_csv('./data/sales.csv')
# print(old.groupby(['chain','collection']).token_id.count())
l0 = len(old)
@ -588,6 +653,7 @@ def add_terra_sales():
old = old[ -(old.collection.isin(sales.collection.unique())) ]
old = old.append(sales)
old = old[[ 'chain','collection','token_id','sale_date','price','tx_id' ]]
# old['collection'] = old.collection.apply(lambda x: 'Levana Dust' if x == 'Levana Meteor Dust' else x )
old = old.drop_duplicates(subset=['collection','token_id','price'])
# old = old[-(old.collection == 'Levana Dragons')]
# old = old[-(old.collection == 'Levana Dragon Eggs')]

View File

Binary file not shown.

View File

@ -28,12 +28,15 @@ def add_att_count():
m_df = pd.read_csv('./data/metadata.csv')
l0 = len(m_df)
solana_rarities = pd.read_csv('./data/solana_rarities.csv')
tokens = pd.read_csv('./data/tokens.csv')[['collection','token_id','nft_rank']]
solana_rarities = pd.read_csv('./data/solana_rarities.csv')
lp_ranks = pd.read_csv('./data/lp_ranks.csv')
gp_ranks = pd.read_csv('./data/gp_ranks.csv')
lev_egg_ranks = m_df[m_df.feature_name == 'collection_rank'][['collection','token_id','feature_value']].rename(columns={'feature_value':'nft_rank'})
lev_egg_ranks['nft_rank'] = lev_egg_ranks.nft_rank.astype(int)
if False:
metadata = pd.read_csv('./data/metadata.csv')
levana_ranks = metadata[(metadata.collection == 'Levana Dragon Eggs') & (metadata.feature_name == 'collection_rank')]
@ -49,9 +52,13 @@ if False:
metadata['chain'] = metadata.collection.apply(lambda x: 'Terra' if x in ['LunaBulls','Galactic Punks','Levana Dragon Eggs'] else 'Solana' )
metadata.to_csv('./data/metadata.csv', index=False)
rarities = solana_rarities.append(lp_ranks).append(gp_ranks).append(tokens).drop_duplicates(keep='first')
rarities = rarities[[ 'collection','token_id','nft_rank' ]]
rarities = solana_rarities.append(lp_ranks).append(gp_ranks).append(lev_egg_ranks).append(tokens)[[ 'collection','token_id','nft_rank' ]].dropna().drop_duplicates(subset=['collection','token_id'], keep='first')
rarities[rarities.collection == 'Levana Dragon Eggs']
# rarities = rarities[[ 'collection','token_id','nft_rank' ]].dropna()
rarities['collection'] = rarities.collection.apply(lambda x: clean_name(x) )
# rarities[rarities.nft_rank.isnull()]
# solana_rarities[solana_rarities.nft_rank.isnull()]
rarities['nft_rank'] = rarities.nft_rank.astype(int)
# rarities[ (rarities.collection == 'Solana Monkey Business') & (rarities.token_id == 903) ]
rarities.loc[ (rarities.collection == 'Solana Monkey Business') & (rarities.token_id == 903) , 'nft_rank' ] = 18
rarities['adj_nft_rank_0'] = rarities.nft_rank.apply(lambda x: (x+1) ** -0.2 )
@ -104,10 +111,15 @@ print(m_df[(m_df.token_id=='10') & (m_df.collection == 'Aurory')])
m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() if type(x) == str else x )
m_df['chain'] = m_df.collection.apply(lambda x: 'Terra' if x in ['LunaBulls','Galactic Punks','Levana Dragon Eggs'] else 'Solana' )
m_df.to_csv('./data/metadata.csv', index=False)
g = m_df[['collection','token_id']].drop_duplicates().groupby('collection').token_id.count().reset_index()
a = m_df.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'atts'})
g = g.merge(a)
g['rat'] = g.atts / g.token_id
print(g)
l1 = len(m_df)
m_df[m_df.collection == 'Levana Dragon Eggs'].feature_name.unique()
print('Adding {} rows'.format(l1 - l0))
m_df[m_df.collection == 'Levana Dragon Eggs']
m_df.to_csv('./data/metadata.csv', index=False)

0
saved_params.pkl Normal file
View File

View File

@ -19,7 +19,7 @@ from selenium.webdriver.common.keys import Keys
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
from utils import merge, clean_name
from utils import clean_token_id, merge, clean_name
# browser = webdriver.Chrome()
@ -43,6 +43,69 @@ def scrape_magic_eden_sales():
df = pd.DataFrame([ x['parsedTransaction'] for x in results if 'parsedTransaction' in x.keys()])
df[[ 'blockTime','collection_symbol','total_amount' ]]
def metadata_from_uri():
df = pd.read_csv('./data/mint_address_token_id_map.csv')
tokens = pd.read_csv('./data/tokens.csv')
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
tokens[tokens.collection == 'Stoned Ape Crew']
sorted(tokens.collection.unique())
collections = [ 'Stoned Ape Crew' ]
data = []
t_data = []
# seen = [ x[1] for x in t_data]
for collection in collections:
print(collection)
it = 0
cur = df[df.collection == collection]
seen = []
for row in cur.iterrows():
it += 1
if it % 250 == 2:
print('{} {} {}'.format(it, len(data), len(t_data)))
row = row[1]
uri = row['uri']
token_id = row['token_id']
if token_id in seen:
continue
try:
j = requests.get(uri, timeout=3).json()
for a in j['attributes']:
data += [[ collection, token_id, a['trait_type'], a['value'] ]]
t_data += [[ collection, token_id, j['image'] ]]
seen.append(token_id)
except:
print(row['uri'])
old = pd.read_csv('./data/metadata.csv')
l0 = len(old)
metadata = pd.DataFrame(data, columns=['collection','token_id','feature_name','feature_value'])
metadata['chain'] = 'Solana'
old['token_id'] = old.token_id.astype(str)
metadata['token_id'] = metadata.token_id.astype(str)
old = old.append(metadata).drop_duplicates(subset=['collection','token_id','feature_name'])
l1 = len(old)
print('Adding {} rows to metadata'.format(l1 - l0))
# old['chain'] = old.collection.apply(lambda x: 'Terra' if x in ['Galactic Punks','Levana Dragon Eggs','LunaBulls'] else 'Solana')
print(old.groupby(['chain','collection']).token_id.count())
old.to_csv('./data/metadata.csv', index=False)
old = pd.read_csv('./data/tokens.csv')
l0 = len(old)
tokens = pd.DataFrame(t_data, columns=['collection','token_id','image_url'])
old['collection'] = old.collection.apply(lambda x: clean_name(x))
old['token_id'] = old.token_id.astype(str)
tokens['token_id'] = tokens.token_id.astype(str)
old = old.merge(tokens, how='left', on=['collection','token_id'])
old['image_url'] = old.image_url_y.fillna(old.image_url_x)
del old['image_url_x']
del old['image_url_y']
tmp = old[old.collection == 'Stoned Ape Crew']
tmp['tmp'] = tmp.image_url.apply(lambda x: x[:20] )
tmp.groupby('tmp').token_id.count()
old = old.drop_duplicates(subset=['collection','token_id'], keep='last')
l1 = len(old)
print('Adding {} rows to tokens'.format(l1 - l0))
old.to_csv('./data/tokens.csv', index=False)
def scrape_not_found(browser):
url = 'https://notfoundterra.com/lunabulls'
browser.get(url)
@ -198,7 +261,8 @@ def scrape_randomearth(browser):
for i in j['items']:
data += [[ 'Terra', collection, i['token_id'], i['price'] / (10 ** 6) ]]
df = pd.DataFrame(data, columns=['chain','collection','token_id','price'])
df.to_csv('~/Downloads/tmp.csv', index=False)
df = clean_token_id(df)
# df.to_csv('~/Downloads/tmp.csv', index=False)
old = pd.read_csv('./data/listings.csv')
old = old[-old.collection.isin(df.collection.unique())]
old = old.append(df)
@ -382,7 +446,7 @@ def scrape_solanafloor():
df.to_csv('./data/sf_projects.csv', index=False)
def scrape_listings(browser, collections = [ 'degods','aurory','thugbirdz','smb','degenapes','peskypenguinclub' ], alerted = [], is_listings = True):
def scrape_listings(browser, collections = [ 'stoned-ape-crew','degods','aurory','thugbirdz','smb','degenapes','peskypenguinclub' ], alerted = [], is_listings = True):
print('Scraping solanafloor listings...')
data = []
m_data = []
@ -468,7 +532,16 @@ def scrape_listings(browser, collections = [ 'degods','aurory','thugbirdz','smb'
# print(row.text)
scroll = browser.find_elements_by_class_name('ag-row-even')
j = min(j, len(scroll) - 1)
try:
browser.execute_script("arguments[0].scrollIntoView();", scroll[j] )
except:
sleep(1)
try:
browser.execute_script("arguments[0].scrollIntoView();", scroll[j] )
except:
sleep(10)
browser.execute_script("arguments[0].scrollIntoView();", scroll[j] )
sleep(.1)
next = browser.find_elements_by_class_name('ag-icon-next')
a = browser.find_element_by_id('ag-17-start-page-number').text
@ -1045,6 +1118,7 @@ def metadata_from_solscan():
[ 'shadowy-super-coder', 'https://sld-gengo.s3.amazonaws.com/{}.json', 0, 10000 ]
, [ 'degods', 'https://sld-gengo.s3.amazonaws.com/{}.json', 1, 10000 ]
, [ 'balloonsville', 'https://bafybeih5i7lktx6o7rjceuqvlxmpqzwfh4nhr322wq5hjncxbicf4fbq2e.ipfs.dweb.link/{}.json', 0, 5000 ]
, [ 'Stoned Ape Crew', 'https://bafybeih5i7lktx6o7rjceuqvlxmpqzwfh4nhr322wq5hjncxbicf4fbq2e.ipfs.dweb.link/{}.json', 0, 5000 ]
]
data = []
token_data = []
@ -1134,6 +1208,11 @@ def scrape_mints():
mints = pd.DataFrame()
auth_to_mint = {}
# metaboss -r https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/ decode mint --list-file ./data/mints/etc/degods.json -o ~/Downloads/degods
# metaboss -r https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/ derive metadata ChANfqf7AP9x1rFRjZkE6n19u3tQckv65Z6r6xPqkRKR --output ~/Downloads
# metaboss decode mint --list-file <LIST_FILE> -o <OUPUT_DIRECTORY>
for collection, update_authority in d.items():
auth_to_mint[update_authority] = collection
for fname in [ './data/mints/'+f for f in os.listdir('./data/mints') ]:

27
scratch.py Normal file
View File

@ -0,0 +1,27 @@
import os
import pandas as pd
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
COLLECTION = 'Stoned Ape Crew'
sales = pd.read_csv('./data/sales.csv')
sales = pd.read_csv('./data/model_sales.csv')
sorted(sales.collection.unique())
# sales = sales[sales.exclude == 0]
metadata = pd.read_csv('./data/metadata.csv')
metadata = metadata[metadata.collection == COLLECTION]
sales = sales[sales.collection == COLLECTION]
features = sorted(metadata.feature_name.unique())
metadata = metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
metadata.columns = [ 'collection','token_id' ] + features
sales['token_id'] = sales.token_id.astype(int)
metadata['token_id'] = metadata.token_id.astype(int)
df = sales.merge(metadata)
# df.sort_values('sale_date', ascending=0).head()
df = df.fillna('None')
df['id'] = len(df)
df['rel_price_0'] = (df.price - df.mn_20).apply(lambda x: max(0, x))
df['rel_price_1'] = (df.price / df.mn_20).apply(lambda x: max(0, x-1))
df[ (df.rel_price_0.notnull()) ].to_csv('./data/tableau_data.csv', index=False)

View File

@ -0,0 +1 @@
{"creators":[{"address":"7RCBr3ZQ8yhY4jHpFFo3Kmh7MnaCPi1bFuUgXUB9WURf","share":0,"verified":true},{"address":"PUFFgnKKhQ23vp8uSPwdzrUhEr7WpLmjM85NB1FQgpb","share":100,"verified":false}],"name":"Stoned Ape #2732","seller_fee_basis_points":742,"symbol":"SAC","uri":"https://arweave.net/EhC3DmvjOUP2346FiEjKhu_xTDvuLfOYMipZTQgg1F8"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@ import collections
import os
import re
import json
from textwrap import indent
import pickle
import warnings
import requests
import numpy as np
@ -25,7 +25,6 @@ os.chdir('/Users/kellenblumberg/git/nft-deal-score')
warnings.filterwarnings('ignore')
###################################
# Define Helper Functions #
###################################
@ -68,7 +67,8 @@ def get_sales(check_exclude = True, exclude=[]):
s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
s_df['token_id'] = s_df.token_id.astype(str)
s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
# s_df['collection'] = s_df.collection_x.fillna(s_df.collection_y).fillna(s_df.collection).apply(lambda x: clean_name(x))
s_df = s_df.drop_duplicates(subset=['token_id','collection','price'])
s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
@ -125,7 +125,7 @@ def get_coefs(cols, coef):
coefs = coefs.sort_values('val', ascending=0)
return(coefs)
def train_model(check_exclude, supplement_with_listings):
def train_model(check_exclude, supplement_with_listings, use_saved_params=True):
exclude = [
( 'aurory', 2239, 3500 )
, ( 'aurory', 1876, 789 )
@ -134,6 +134,7 @@ def train_model(check_exclude, supplement_with_listings):
, ( 'aurory', 9239, 1700 )
]
s_df = get_sales(check_exclude, exclude)
s_df[s_df.collection.isnull()]
# s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
# s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
# s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
@ -215,8 +216,10 @@ def train_model(check_exclude, supplement_with_listings):
if supplement_with_listings:
pred_price = pd.read_csv('./data/pred_price.csv')
pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
pred_price['token_id'] = pred_price.token_id.astype(str)
listings['collection'] = listings.collection.apply(lambda x: clean_name(x))
listings['block_timestamp'] = s_df.block_timestamp.max()
listings['token_id'] = listings.token_id.astype(str)
# listings = listings[listings.collection.isin(pred_price.collection.unique())]
floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']]
tmp = merge(listings, pred_price, ensure=False)
@ -259,7 +262,7 @@ def train_model(check_exclude, supplement_with_listings):
ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2']
MODEL_EXCLUDE_COLS = {
# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight']
'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight','collection_rank_group']
, 'Solana Monkey Business': ['Clothes_Diamond']
}
MODEL_INCLUDE_COLS = {
@ -283,12 +286,25 @@ def train_model(check_exclude, supplement_with_listings):
# for collection in [ 'Solana Monkey Business' ]:
# for collection in [ 'Aurory' ]:
# for collection in [ 'Aurory','Solana Monkey Business' ]:
collections = list(s_df[['collection']].drop_duplicates().merge(m_df[['collection']].drop_duplicates()).collection.unique())
sorted(pred_price.collection.unique())
sorted(s_df.collection.unique())
print(sorted(m_df.collection.unique()))
# for collection in m_df.collection.unique():
for collection in [ 'DeGods' ]:
# for collection in s_df.collection.unique():
saved_params = {}
file_to_store = open('./objects/saved_params.pickle', 'rb')
saved_params = pickle.load(file_to_store)
collection = 'Aurory'
collection = 'Levana Dragon Eggs'
collection = 'Galactic Punks'
collection = 'Stoned Ape Crew'
collections = ['Levana Dragon Eggs']
collections = list(s_df[['collection']].drop_duplicates().merge(m_df[['collection']].drop_duplicates()).collection.unique())
collections = ['Stoned Ape Crew']
for collection in collections:
# if collection == 'Stoned Ape Crew':
# continue
if not collection in saved_params.keys():
saved_params[collection] = {}
coefsdf = coefsdf[coefsdf.collection != collection]
salesdf = salesdf[salesdf.collection != collection]
attributes = attributes[attributes.collection != collection]
@ -297,6 +313,9 @@ def train_model(check_exclude, supplement_with_listings):
print('Working on collection {}'.format(collection))
sales = s_df[ s_df.collection == collection ]
metadata = m_df[ m_df.collection == collection ]
metadata = metadata[metadata.feature_name != 'Genesis Role?']
metadata[metadata.token_id=='1']
metadata[metadata.feature_name=='Genesis Role?'].feature_value.unique()
sorted(metadata.feature_name.unique())
# metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False)
# metadata[metadata.token_id == '1']
@ -306,7 +325,7 @@ def train_model(check_exclude, supplement_with_listings):
metadata = metadata[-metadata.feature_name.isin(['rank','pct','ipfs_image'])]
metadata.feature_name.unique()
metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')]
print(sorted(metadata.feature_name.unique()))
# print(sorted(metadata.feature_name.unique()))
# categorize columns
all_names = sorted(metadata.feature_name.unique())
@ -314,7 +333,7 @@ def train_model(check_exclude, supplement_with_listings):
num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
num_metadata = metadata[metadata.feature_name.isin(num_features)]
print(sorted(num_metadata.feature_name.unique()))
# print(sorted(num_metadata.feature_name.unique()))
num_metadata[num_metadata.feature_name == 'nft_rank']
cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
cat_metadata = metadata[metadata.feature_name.isin(cat_features)]
@ -326,7 +345,7 @@ def train_model(check_exclude, supplement_with_listings):
# create dummies for binary variables
cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
cat_metadata.columns = [ 'collection','token_id' ] + cat_features
cat_metadata = calculate_percentages( cat_metadata, cat_features )
# cat_metadata = calculate_percentages( cat_metadata, cat_features )
dummies = pd.get_dummies(cat_metadata[cat_features])
# dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False)
if collection == 'Solana Monkey Business':
@ -342,7 +361,7 @@ def train_model(check_exclude, supplement_with_listings):
del dummies['matching_black']
cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
del cat_metadata['pct']
# del cat_metadata['pct']
for c in model_exclude:
if c in dummies.columns:
@ -358,7 +377,7 @@ def train_model(check_exclude, supplement_with_listings):
assert(len(df.columns) < 1000)
# test dataFrame
ensure = not collection in ['Aurory']
ensure = not collection in ['Aurory','Stoned Ape Crew']
test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=False)
if collection == 'Solana Monkey Business':
@ -394,6 +413,8 @@ def train_model(check_exclude, supplement_with_listings):
else:
tmp['is_top_{}'.format(i)] = (tmp.nft_rank <= i).astype(int)
pred_cols += [ 'is_top_100','is_top_250','is_top_1000' ]
if 'collection_rank' in pred_cols:
pred_cols = [ x for x in pred_cols if not x in ['nft_rank'] ]
df.sort_values('price', ascending=0)[['price']].head(20)
# df.groupby(['rarity','weight']).price.mean()
@ -422,8 +443,8 @@ def train_model(check_exclude, supplement_with_listings):
tmp.sort_values('b').head(20)
rem = list(tmp[tmp.b==0].a.values)
std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
if collection == 'Levana Dragon Eggs':
std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
# if collection == 'Levana Dragon Eggs':
# std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
mn = df.timestamp.min()
mx = df.timestamp.max()
df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) )
@ -433,18 +454,18 @@ def train_model(check_exclude, supplement_with_listings):
# df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) )
# df.sort_values('price', ascending=0)[['price','wt']].head(20)
# std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ]
cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
cur_std_pred_cols = deepcopy(std_pred_cols)
g = df[std_pred_cols].sum().reset_index()
g.columns = [ 'col','cnt' ]
g = g.sort_values('cnt')
g.head(20)
# cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
# cur_std_pred_cols = deepcopy(std_pred_cols)
# g = df[std_pred_cols].sum().reset_index()
# g.columns = [ 'col','cnt' ]
# g = g.sort_values('cnt')
# g.head(20)
if collection == 'Solana Monkey Busines':
df.loc[ df.token_id == '903', 'nft_rank' ] = 18
df[df.token_id=='903']
df[df.token_id==903]
df = df.reset_index(drop=True)
X = df[cur_std_pred_cols].values
X = df[std_pred_cols].values
y_0 = df.rel_price_0.values
y_1 = df.rel_price_1.values
@ -480,11 +501,13 @@ def train_model(check_exclude, supplement_with_listings):
y_val_rar_adj = df[rar_adj_target_col].values
models = ['las','ridge'] if target_col == 'rel_price_1' else ['las','ridge','rfr']
for model in models:
cur_std_pred_cols = std_pred_cols
cur_std_pred_cols = deepcopy(std_pred_cols)
print(model)
y = y_val_rar_adj if model in ['rfr'] else y_val
col = 'y_pred_{}_{}'.format(model, it)
df, bst_p, bst_r = ku.get_bst_params( model, df, X, y, target_col, col, verbose = True, wt_col='wt' )
params = [saved_params[collection][col]] if col in saved_params[collection].keys() and use_saved_params else []
df, bst_p, bst_r = ku.get_bst_params( model, df, X, y, target_col, col, verbose = True, wt_col='wt', params = params )
saved_params[collection][col] = bst_p
# if model == 'ridge':
# while len(cur_std_pred_cols) > 50:
@ -506,9 +529,10 @@ def train_model(check_exclude, supplement_with_listings):
cur_std_pred_cols = [ c for c in coefs[coefs.val >= 0 ].col.unique() ]
X_new = df[cur_std_pred_cols].values
clf.fit(X_new, y)
# df, bst_p, bst_r = ku.get_bst_params( model, df, df[cur_std_pred_cols].values, y, target_col, col, verbose = True, wt_col='wt' )
coefs = get_coefs(cur_std_pred_cols, clf.coef_)
mn = coefs.val.min()
if mn >= 0:
df, bst_p, bst_r = ku.get_bst_params( model, df, X_new, y, target_col, col, verbose = True, wt_col='wt', params = [bst_p] )
coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it), index=False)
test = ku.apply_model( model, bst_p, df, test, cur_std_pred_cols, target_col, col)
if model in ['rfr']:
@ -543,11 +567,12 @@ def train_model(check_exclude, supplement_with_listings):
clf = LinearRegression(fit_intercept=False)
target_col = 'adj_price'
clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
clf.score( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
df[['pred_lin','pred_log',target_col]].mean()
df[['pred_lin','pred_log',target_col]].median()
test[['pred_lin','pred_log']].mean()
test[['pred_lin','pred_log']].median()
score = clf.score( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
print('R-Sq: {}'.format(round(score * 100, 1)))
# df[['pred_lin','pred_log',target_col]].mean()
# df[['pred_lin','pred_log',target_col]].median()
# test[['pred_lin','pred_log']].mean()
# test[['pred_lin','pred_log']].median()
print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], CUR_FLOOR]], columns=['collection','lin_coef','log_coef','floor_price'])
@ -591,7 +616,8 @@ def train_model(check_exclude, supplement_with_listings):
df[df.pred < 200].err.mean()
df['collection'] = collection
print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
salesdf = salesdf.append( df.rename(columns={'collection_rank':'nft_rank'}).merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
# salesdf = salesdf.append( df.rename(columns={'collection_rank':'nft_rank'}).merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
############################################################
# Create Predictions for Each NFT in The Collection #
@ -703,6 +729,9 @@ def train_model(check_exclude, supplement_with_listings):
# nft_rank['token_id'] = nft_rank.token_id.astype(str)
# pred_price['token_id'] = pred_price.token_id.astype(str)
# pred_price = pred_price.merge(nft_rank, how='left', on=['collection','token_id'])
# pred_price = pred_price[pred_price.collection != 'LunaBulls']
pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
pred_price = pred_price.drop_duplicates(subset=['collection','token_id'], keep='last')
pred_price.to_csv('./data/pred_price.csv', index=False)
# pred_price = pd.read_csv('./data/pred_price.csv')
pred_price.groupby('collection')[['pred_price']].min()
@ -725,14 +754,17 @@ def train_model(check_exclude, supplement_with_listings):
feature_values.to_csv('./data/feature_values.csv', index=False)
file_to_store = open('./objects/saved_params.pickle', 'wb')
pickle.dump(saved_params, file_to_store)
if True or check_exclude:
exclude = pd.read_csv('./data/exclude.csv')
salesdf['rat'] = salesdf.price / salesdf.pred
salesdf['dff'] = salesdf.price - salesdf.pred
salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2.5))).astype(int)
salesdf['rat'] = salesdf.pred / salesdf.price
salesdf['dff'] = salesdf.pred - salesdf.price
salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2.5))).astype(int)
salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
print(salesdf.exclude_1.mean())
print(salesdf.exclude_2.mean())
@ -744,5 +776,5 @@ def train_model(check_exclude, supplement_with_listings):
# train_model(True, False)
# train_model(False, False)
# train_model(False, True)
train_model(False, True)

View File

@ -5,6 +5,8 @@ from time import sleep
from copy import deepcopy
import random
from utils import clean_name
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
@ -14,8 +16,6 @@ import solana_model as sm
browser = webdriver.Chrome()
if False:
alerted = []
for i in range(10):
@ -50,6 +50,7 @@ if False:
ssn.scrape_recent_smb_sales(browser)
ssn.scrape_recent_sales()
ld.add_terra_sales()
ld.add_solana_sales()
# update listings
ssn.scrape_randomearth(browser)
@ -93,24 +94,29 @@ def add_model_sales():
def update_token_ids():
tokens = pd.read_csv('./data/tokens.csv')
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
tokens = tokens.drop_duplicates(subset=['collection','token_id'], keep='last')
tokens.to_csv('./data/tokens.csv', index=False)
tokens.groupby('collection').token_id.count()
tokens['tmp'] = tokens.token_id.apply(lambda x: (int(float(x))) )
tokens[tokens.token_id == 223838831896070003935953339589523931136]
tokens[tokens.collection=='Galactic Punks']
tokens['token_id'] = tokens.token_id.apply(lambda x: str(int(float(x))) )
tokens['tmp'] = tokens.token_id.apply(lambda x: len(x) )
# tokens['tmp'] = tokens.token_id.apply(lambda x: len(x) )
tokens.tmp.max()
# df[ (df.collection == 'Pesky Penguins') & (df.token_id == '3362') ]
tokens[ (tokens.collection == 'Pesky Penguins') & (tokens.token_id == '3362') ]
tokens[ (tokens.collection == 'Pesky Penguins') & (tokens.token_id == 3362) ]
# df.token_id.unique()
c = 'listings'
c = 'sales'
# for c in [ 'listings' ]:
for c in [ 'attributes','sales','listings' ]:
print(c)
df = pd.read_csv('./data/{}.csv'.format(c))
df['collection'] = df.collection.apply(lambda x: clean_name(x))
# df.token_id.unique()
df = df[df.token_id.notnull()]
df['token_id'] = df.token_id.apply(lambda x: None if x == 'nan' else str(int(float(x))) )
# df['token_id'] = df.token_id.apply(lambda x: None if x == 'nan' else str(int(float(x))) )
df['token_id'] = df.token_id.apply(lambda x: None if x == 'nan' else str(int(float(x))) )
# df['tmp'] = df.token_id.apply(lambda x: (str(x)[:5]))
df['tmp'] = df.token_id.apply(lambda x: x[:10] )

View File

@ -1,3 +1,4 @@
import re
import pandas as pd
@ -10,11 +11,27 @@ clean_names = {
,'meerkatmillionaires': 'Meerkat Millionaires'
,'boryokudragonz': 'Boryoku Dragonz'
,'degods': 'DeGods'
,'lunabulls': 'LunaBulls'
# ,'stonedapecrew': 'Stoned Ape Crew'
}
def clean_token_id(df):
tokens = pd.read_csv('./data/tokens.csv')
df['collection'] = df.collection.apply(lambda x: clean_name(x))
df['token_id'] = df.token_id.apply(lambda x: re.sub('"', '', x) if type(x)==str else x )
df['tmp'] = df.token_id.apply(lambda x: x[:10] )
tokens['tmp'] = tokens.token_id.apply(lambda x: x[:10] )
df = df.merge(tokens[['collection','tmp','clean_token_id']], how='left', on=['collection','tmp'])
df['token_id'] = df.clean_token_id.fillna(df.token_id)
df['token_id'] = df.token_id.astype(int)
del df['tmp']
del df['clean_token_id']
return(df)
def clean_name(name):
if name.lower() in clean_names.keys():
return(clean_names[name.lower()])
x = re.sub('-', '', name).lower()
if x in clean_names.keys():
return(clean_names[x])
name = re.sub('-', ' ', name.title())
return(name)
@ -27,7 +44,7 @@ def merge(left, right, on=None, how='inner', ensure=True, verbose=True, message
print('{} -> {}'.format(len(left), len(df)))
cur = left.merge(right, on=on, how='left')
cols = set(right.columns).difference(set(left.columns))
print(cols)
# print(cols)
if ensure:
col = list(cols)[0]
missing = cur[cur[col].isnull()]

View File

@ -408,7 +408,7 @@ server <- function(input, output, session) {
selectInput(
inputId = 'collectionname'
, label = NULL
, selected = 'DeGods'
, selected = 'Stoned Ape Crew'
, choices = choices
, width = "100%"
)
@ -454,10 +454,14 @@ server <- function(input, output, session) {
, ifelse(
selected == 'Solana Monkey Business'
, selected
, ifelse(
selected == 'Stoned Ape Crew'
, 'Stoned Ape'
, substr(selected, 1, nchar(selected) - 1)
)
)
)
)
if (!is.na(id)) {
t <- paste0(title," #", id)
}