2021-12-26 21:28:47 +00:00
|
|
|
import os
|
2022-01-06 07:07:29 +00:00
|
|
|
import pandas as pd
|
2021-12-26 21:28:47 +00:00
|
|
|
from selenium import webdriver
|
2022-01-28 23:05:50 +00:00
|
|
|
from time import sleep
|
|
|
|
|
from copy import deepcopy
|
|
|
|
|
import random
|
2021-12-26 21:28:47 +00:00
|
|
|
|
2022-02-21 16:10:04 +00:00
|
|
|
|
2021-12-26 21:28:47 +00:00
|
|
|
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
|
|
|
|
os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
|
|
|
|
|
|
|
|
|
|
import scrape_sol_nfts as ssn
|
|
|
|
|
import load_data as ld
|
|
|
|
|
import solana_model as sm
|
2022-02-24 20:49:51 +00:00
|
|
|
from utils import clean_name
|
2021-12-26 21:28:47 +00:00
|
|
|
|
|
|
|
|
browser = webdriver.Chrome()
|
|
|
|
|
|
2022-01-28 23:05:50 +00:00
|
|
|
if False:
|
2022-02-18 21:56:05 +00:00
|
|
|
alerted = []
|
|
|
|
|
for i in range(10):
|
|
|
|
|
ssn.scrape_randomearth(browser)
|
|
|
|
|
update_token_ids()
|
|
|
|
|
listings = pd.read_csv('./data/listings.csv')
|
|
|
|
|
listings = listings[listings.chain == 'Terra']
|
|
|
|
|
listings.collection.unique()
|
|
|
|
|
alerted = ssn.calculate_deal_scores(listings, alerted)
|
|
|
|
|
sleep(10 * 60)
|
2022-01-28 23:05:50 +00:00
|
|
|
|
2021-12-26 21:28:47 +00:00
|
|
|
# sales = pd.read_csv('./data/sales.csv')
|
|
|
|
|
# pred_price = pd.read_csv('./data/pred_price.csv').sort_values('token_id')
|
|
|
|
|
# pred_price['rank'] = pred_price.groupby('collection').pred_price.rank(ascending=0)
|
|
|
|
|
# sales = sales.merge(pred_price[['collection','token_id','rank']])
|
|
|
|
|
# sales = sales[ sales.collection.isin(['Solana Monkey Business','Degen Apes','Aurory','Pesky PenguinsPesky Penguins','Thugbirdz']) ]
|
|
|
|
|
# sales = sales[sales['rank']<=10].sort_values('price', ascending=0)
|
|
|
|
|
# # sales = sales.sort_values('price', ascending=0).groupby('collection').head(3)[['collection','sale_date','token_id','price','rank']].sort_values('collection')
|
|
|
|
|
# d = {
|
|
|
|
|
# 'Solana Monkey Business': 140,
|
|
|
|
|
# 'Aurory': 18.5,
|
|
|
|
|
# 'Degen Apes': 34,
|
|
|
|
|
# 'Thugbirdz': 40,
|
|
|
|
|
# }
|
|
|
|
|
# sales['current_floor'] = sales.collection.apply(lambda x: d[x] )
|
|
|
|
|
# sales['floor_ratio'] = sales.price / sales.current_floor
|
|
|
|
|
# sales.to_csv('~/Downloads/tmp.csv', index=False)
|
|
|
|
|
|
|
|
|
|
# update sales
|
2022-05-03 00:19:42 +00:00
|
|
|
# ssn.scrape_recent_smb_sales(browser)
|
|
|
|
|
# ssn.scrape_recent_sales()
|
2022-01-28 23:05:50 +00:00
|
|
|
ld.add_terra_sales()
|
2022-02-21 16:10:04 +00:00
|
|
|
ld.add_solana_sales()
|
2022-03-22 20:27:08 +00:00
|
|
|
ld.add_eth_sales()
|
2021-12-26 21:28:47 +00:00
|
|
|
|
|
|
|
|
# update listings
|
2022-04-20 22:47:18 +00:00
|
|
|
# ssn.scrape_listings(browser, ['meerkat-millionaires-cc'])
|
2022-05-03 00:19:42 +00:00
|
|
|
# ssn.scrape_listings(browser, ['catalina-whale-mixer'])
|
2022-03-21 16:34:42 +00:00
|
|
|
ssn.scrape_listings(browser)
|
2021-12-26 21:28:47 +00:00
|
|
|
ssn.scrape_randomearth(browser)
|
2022-05-01 05:50:55 +00:00
|
|
|
# ssn.scrape_opensea_listings(browser)
|
2022-01-28 23:05:50 +00:00
|
|
|
# ssn.scrape_listings(browser, ['smb','aurory'])
|
2021-12-26 21:28:47 +00:00
|
|
|
|
2022-05-01 05:50:55 +00:00
|
|
|
# listings = pd.read_csv('./data/listings.csv')
|
|
|
|
|
# listings[listings.collection == 'Solana Monkey Business'].sort_values('price').head(20)
|
2022-04-20 22:47:18 +00:00
|
|
|
|
2021-12-26 21:28:47 +00:00
|
|
|
# update model
|
2022-01-06 15:56:30 +00:00
|
|
|
# ssn.convert_collection_names()
|
|
|
|
|
# sm.train_model(True, False)
|
|
|
|
|
# sm.train_model(False, False)
|
2022-01-06 07:07:29 +00:00
|
|
|
|
2022-02-15 22:19:07 +00:00
|
|
|
# sales = pd.read_csv('./data/sales.csv')
|
|
|
|
|
# listings = pd.read_csv('./data/listings.csv')
|
|
|
|
|
# listings.price.max()
|
|
|
|
|
# sales.price.max()
|
2022-02-04 04:35:13 +00:00
|
|
|
|
|
|
|
|
def add_model_sales():
|
2022-02-18 21:56:05 +00:00
|
|
|
sales = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
|
|
|
|
|
print(sales.groupby('collection').token_id.count())
|
|
|
|
|
sales.token_id.unique()
|
|
|
|
|
sales.groupby('collection').token_id.count()
|
|
|
|
|
sales[sales.collection == 'Galactic Punks']
|
|
|
|
|
del sales['tx_id']
|
|
|
|
|
old = pd.read_csv('./data/pred_price.csv').rename(columns={'rank':'nft_rank'})
|
|
|
|
|
old = pd.read_csv('./data/pred_price copy.csv').rename(columns={'rank':'nft_rank'})
|
|
|
|
|
old.groupby('collection').token_id.count()
|
|
|
|
|
sales['token_id'] = sales.token_id.astype(int).astype(str)
|
|
|
|
|
old['token_id'] = old.token_id.astype(str)
|
|
|
|
|
sales = sales.merge( old[['collection','token_id','nft_rank']] )
|
|
|
|
|
sales.head()
|
|
|
|
|
sales['block_timestamp'] = sales.block_timestamp.apply(lambda x: str(x)[:19] )
|
|
|
|
|
sales['price'] = sales.price.apply(lambda x: round(x, 2))
|
|
|
|
|
print(sales.groupby('collection').token_id.count())
|
|
|
|
|
sales.to_csv('./data/model_sales.csv', index=False)
|
|
|
|
|
sales = pd.read_csv('./data/model_sales.csv')
|
|
|
|
|
print(len(sales))
|
|
|
|
|
sales = sales.drop_duplicates(subset=['collection','token_id','price'])
|
|
|
|
|
print(len(sales))
|
|
|
|
|
sales.to_csv('./data/model_sales.csv', index=False)
|
2022-02-04 04:35:13 +00:00
|
|
|
|
|
|
|
|
|
2022-01-06 07:07:29 +00:00
|
|
|
def update_token_ids():
|
2022-02-18 21:56:05 +00:00
|
|
|
tokens = pd.read_csv('./data/tokens.csv')
|
2022-02-21 16:10:04 +00:00
|
|
|
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
|
|
|
|
|
tokens = tokens.drop_duplicates(subset=['collection','token_id'], keep='last')
|
|
|
|
|
tokens.to_csv('./data/tokens.csv', index=False)
|
2022-02-18 21:56:05 +00:00
|
|
|
tokens.groupby('collection').token_id.count()
|
|
|
|
|
tokens['tmp'] = tokens.token_id.apply(lambda x: (int(float(x))) )
|
|
|
|
|
tokens[tokens.token_id == 223838831896070003935953339589523931136]
|
|
|
|
|
tokens[tokens.collection=='Galactic Punks']
|
|
|
|
|
tokens['token_id'] = tokens.token_id.apply(lambda x: str(int(float(x))) )
|
2022-02-21 16:10:04 +00:00
|
|
|
# tokens['tmp'] = tokens.token_id.apply(lambda x: len(x) )
|
2022-02-18 21:56:05 +00:00
|
|
|
tokens.tmp.max()
|
|
|
|
|
# df[ (df.collection == 'Pesky Penguins') & (df.token_id == '3362') ]
|
|
|
|
|
tokens[ (tokens.collection == 'Pesky Penguins') & (tokens.token_id == '3362') ]
|
|
|
|
|
tokens[ (tokens.collection == 'Pesky Penguins') & (tokens.token_id == 3362) ]
|
|
|
|
|
# df.token_id.unique()
|
2022-02-21 16:10:04 +00:00
|
|
|
c = 'sales'
|
|
|
|
|
# for c in [ 'listings' ]:
|
2022-02-18 21:56:05 +00:00
|
|
|
for c in [ 'attributes','sales','listings' ]:
|
|
|
|
|
print(c)
|
|
|
|
|
df = pd.read_csv('./data/{}.csv'.format(c))
|
2022-02-21 16:10:04 +00:00
|
|
|
df['collection'] = df.collection.apply(lambda x: clean_name(x))
|
2022-02-18 21:56:05 +00:00
|
|
|
# df.token_id.unique()
|
|
|
|
|
df = df[df.token_id.notnull()]
|
2022-02-21 16:10:04 +00:00
|
|
|
# df['token_id'] = df.token_id.apply(lambda x: None if x == 'nan' else str(int(float(x))) )
|
2022-02-18 21:56:05 +00:00
|
|
|
df['token_id'] = df.token_id.apply(lambda x: None if x == 'nan' else str(int(float(x))) )
|
|
|
|
|
# df['tmp'] = df.token_id.apply(lambda x: (str(x)[:5]))
|
|
|
|
|
df['tmp'] = df.token_id.apply(lambda x: x[:10] )
|
|
|
|
|
# tokens['tmp'] = tokens.token_id.apply(lambda x: x[:10] )
|
|
|
|
|
# len(tokens)
|
|
|
|
|
# len(tokens[['collection','token_id']].drop_duplicates())
|
|
|
|
|
# len(tokens[['collection','tmp']].drop_duplicates())
|
|
|
|
|
# df.to_csv('~/Downloads/tmp2.csv', index=False)
|
|
|
|
|
if 'clean_token_id' in df.columns:
|
|
|
|
|
del df['clean_token_id']
|
|
|
|
|
|
|
|
|
|
# tokens[tokens.collection=='Galactic Punks']
|
|
|
|
|
# len(tokens[tokens.collection=='Galactic Punks'])
|
|
|
|
|
# tokens[(tokens.collection=='Galactic Punks') & (tokens.token_id=='25984997114855597728010029317878710272')]
|
|
|
|
|
# 25984997114855639851202718743284654443
|
|
|
|
|
# 25984997114855597728010029317878710272
|
|
|
|
|
|
|
|
|
|
# a = set(tokens[tokens.collection=='Galactic Punks'].token_id.unique())
|
|
|
|
|
# b = set(df[df.collection=='Galactic Punks'].token_id.unique())
|
|
|
|
|
# len(a.intersection(b))
|
|
|
|
|
# [ x for x in a if x in b ]
|
|
|
|
|
# len([ x for x in a if x in b ])
|
|
|
|
|
# df[(df.collection=='Galactic Punks')].token_id.values[0]
|
|
|
|
|
df = df.merge(tokens[['collection','tmp','clean_token_id']], how='left', on=['collection','tmp'])
|
|
|
|
|
# df[df.collection == 'Galactic Punks'].sort_values('clean_token_id')
|
|
|
|
|
# print(df[ (df.clean_token_id.isnull()) & ( df.collection == 'Galactic Punks')])
|
|
|
|
|
# print(len(df[ (df.clean_token_id.isnull()) & ( df.chain == 'Terra')]))
|
|
|
|
|
# print(len(df[ (df.clean_token_id.isnull())]))
|
|
|
|
|
# print(df[ (df.clean_token_id.isnull())].groupby('collection').token_id.count() )
|
|
|
|
|
# print(df[ (df.clean_token_id.notnull())].groupby('collection').token_id.count() )
|
|
|
|
|
# print(len(df[ (df.clean_token_id.notnull()) & ( df.collection == 'Galactic Punks')]))
|
|
|
|
|
# min(df[df.collection == 'Galactic Punks'].token_id.values)
|
|
|
|
|
# min(tokens[tokens.collection == 'Galactic Punks'].token_id.values)
|
|
|
|
|
df['clean_token_id'] = df.clean_token_id.fillna(df.token_id).astype(float).astype(int).astype(str)
|
|
|
|
|
# print(df[ (df.token_id.isnull()) & ( df.collection == 'Galactic Punks')])
|
|
|
|
|
df[df.clean_token_id.isnull()].groupby('collection').token_id.count()
|
|
|
|
|
df[df.clean_token_id.notnull()].groupby('collection').token_id.count()
|
|
|
|
|
df['token_id'] = df.clean_token_id
|
|
|
|
|
del df['clean_token_id']
|
|
|
|
|
df[df.collection == 'Galactic Punks']
|
|
|
|
|
print(df.groupby('collection').token_id.count() )
|
|
|
|
|
df.to_csv('./data/{}.csv'.format(c), index=False)
|
2022-01-28 23:05:50 +00:00
|
|
|
|
2022-02-24 20:49:51 +00:00
|
|
|
# update_token_ids()
|
2022-02-15 22:19:07 +00:00
|
|
|
# add_model_sales()
|
2022-03-18 00:23:50 +00:00
|
|
|
sm.train_model()
|
2022-02-15 22:19:07 +00:00
|
|
|
|
2022-05-01 05:50:55 +00:00
|
|
|
if False:
|
|
|
|
|
listings = pd.read_csv('./data/listings.csv')
|
|
|
|
|
tokens = pd.read_csv('./data/tokens.csv')
|
2022-05-03 00:19:42 +00:00
|
|
|
tokens['clean_token_id'] = tokens.clean_token_id.fillna(tokens.token_id)
|
2022-05-01 05:50:55 +00:00
|
|
|
tokens[tokens.collection == 'Okay Bears']
|
|
|
|
|
tokens[tokens.collection == 'Okay Bears'].image_url.values[0]
|
|
|
|
|
tokens['chain'] = tokens.chain.fillna('Solana')
|
|
|
|
|
tokens.to_csv('./data/tokens.csv', index=False)
|
|
|
|
|
listings[listings.chain.isnull()]
|
|
|
|
|
listings[listings.collection == 'Okay Bears']
|