Merge pull request #43 from FlipsideCrypto/kellen-updates-2022-02-03

Kellen updates 2022 02 03
This commit is contained in:
flipside-kellen 2022-07-14 15:19:06 -07:00 committed by GitHub
commit af33e78dd3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 8326 additions and 928 deletions

View File

@ -9,6 +9,7 @@ sudo cp ~/nft-deal-score/viz/nft_deal_score_data.RData /srv/shiny-server/nft-dea
sudo cp ~/nft_deal_score_listings_data.RData /rstudio-data
sudo cp ~/nft_deal_score_sales_data.RData /rstudio-data
sudo cp ~/nft_deal_score_sales.csv /rstudio-data
sudo cp ~/nft_deal_score_data.RData /rstudio-data
sudo cp ~/nft_deal_score_listings.csv /rstudio-data
sudo cp ~/nft_deal_score_sales.csv /rstudio-data

View File

@ -27,7 +27,7 @@ attributes[, feature_name := trimws(feature_name) ]
attributes[, feature_value := trimws(as.character(feature_value)) ]
feature_values <- read_csv('feature_values.csv')
sales <- read_csv('model_sales.csv')
listings <- read_csv('listings.csv')
listings <- read.csv('/Users/kellenblumberg/git/nft-deal-score/viz/nft_deal_score_listings.csv') %>% as.data.table()
coefsdf <- read_csv('coefsdf.csv')
tokens <- read_csv('tokens.csv')
tokens[, token_id := clean_token_id]
@ -39,18 +39,25 @@ listings <- listings[ !(collection == 'Solana Monkey Business' & token_id == 953
tokens[, token_id := as.numeric(token_id)]
# manual adjustments to price
ids_1 <- attributes[ (collection == 'Aurory') & (feature_value == 'Solana Blob') ]$token_id
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_1), pred_price := (pred_price * 0.8) ]
# ids_1 <- attributes[ (collection == 'Aurory') & (feature_value == 'Solana Blob') ]$token_id
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_1), pred_price := (pred_price * 0.8) ]
ids_2 <- attributes[ (collection == 'Aurory') & (feature_value == 'Long Blob Hair ') ]$token_id
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_2), pred_price := (pred_price * 0.90) ]
# ids_2 <- attributes[ (collection == 'Aurory') & (feature_value == 'Long Blob Hair ') ]$token_id
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_2), pred_price := (pred_price * 0.90) ]
ids_3 <- attributes[ (collection == 'Aurory') & (grepl( 'Mask', feature_value, fixed = TRUE)) ]$token_id
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_3), pred_price := (pred_price * 0.975) ]
# ids_3 <- attributes[ (collection == 'Aurory') & (grepl( 'Mask', feature_value, fixed = TRUE)) ]$token_id
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_3), pred_price := (pred_price * 0.975) ]
sales[collection == 'Cets On Creck', collection := 'Cets on Creck']
pred_price[collection == 'Cets On Creck', collection := 'Cets on Creck']
# sales[collection == 'Cets On Creck', collection := 'Cets on Creck']
# pred_price[collection == 'Cets On Creck', collection := 'Cets on Creck']
listings[collection == 'Cets On Creck', collection := 'Cets on Creck']
cols <- c( 'Citizens By Solsteads' )
# sales[, tmp := tolower(coll)]
for (col in cols) {
sales[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
pred_price[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
listings[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
}
sort(unique(listings$collection))
@ -58,10 +65,13 @@ sort(unique(pred_price$collection))
sort(unique(sales$collection))
# filter for only collections that have all data
a <- unique(pred_price[, list(collection)])
b <- unique(sales[, list(collection)])
c <- unique(listings[, list(collection)])
a <- unique(pred_price[, list(collection)][order(collection)])
b <- unique(sales[, list(collection)][order(collection)])
c <- unique(listings[, list(collection)][order(collection)])
d <- merge(merge(a, b), c)
d <- d[order(collection)]
d <- d[ collection %in% c('Aurory','Bubblegoose Ballers','Catalina Whale Mixer','Cets on Creck','DeGods','Degen Apes','Famous Fox Federation','Meerkat Millionaires','Okay Bears','Pesky Penguins','Primates','SOLGods','Solana Monkey Business','Stoned Ape Crew','ThugbirdzcMAYC') ]
write.csv(d, '~/Downloads/tmp.csv', row.names=F)
pred_price <- merge(pred_price, d, by=c('collection'))
attributes <- merge(attributes, d, by=c('collection'))
@ -91,6 +101,7 @@ save(
, tokens
, file = paste0(file.location,'nft_deal_score_data.Rdata')
)
# save(
# listings
# , file = paste0(file.location,'nft_deal_score_listings_data.Rdata')

View File

@ -4,6 +4,7 @@ import os
import math
import json
from typing import Collection
from nbformat import write
import pandas as pd
import snowflake.connector
@ -364,15 +365,37 @@ def solana():
query = '''
SELECT DISTINCT project_name
FROM solana.dim_nft_metadata
'''
seen = ctx.cursor().execute(query)
seen = pd.DataFrame.from_records(iter(seen), columns=[x[0] for x in seen.description])
seen = clean_colnames(seen)
seen = list(seen.project_name.values)
seen = [ x.lower() for x in seen ]
metadata = pd.read_csv('./data/metadata.csv')
len(metadata)
# print(sorted(metadata.collection.unique()))
# metadata = metadata[metadata.collection == collection]
# print(sorted(metadata.collection.unique()))
metadata = metadata[-(metadata.feature_name.isin(['adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2','nft_rank']))]
metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
len(metadata.token_id.unique())
id_map = pd.read_csv('./data/mint_to_token_id_map.csv')
# id_map = pd.read_csv('./data/mint_to_token_id_map.csv')
id_map = pd.read_csv('./data/tokens.csv')
cs = ['Stoned Ape Crew']
tokens = pd.read_csv('./data/tokens.csv')
tokens.collection.unique()
len(tokens.collection.unique())
cs = [ x for x in id_map.collection.unique() if not x.lower() in seen ]
len(id_map.collection.unique())
len(cs)
id_map = id_map[id_map.collection.isin(cs)]
metadata = metadata[metadata.collection.isin(cs)]
# cs = metadata[metadata.chain.fillna('Solana') == 'Solana'].collection.unique()
cs = metadata.collection.unique()
id_map = id_map[id_map.collection.isin(cs)]
metadata = metadata[metadata.collection.isin(cs)]
sorted(id_map.collection.unique())
@ -399,51 +422,180 @@ def solana():
# sorted(metadata.feature_name.unique())
# metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
# Python code to convert into dictionary
def Convert(tup, di):
di = dict(tup)
return di
metadata = metadata[-metadata.collection.isin(['LunaBulls', 'Levana Dragon Eggs'])]
metadata['token_id'] = metadata.token_id.astype(float)
metadata['token_id'] = metadata.token_id.astype(int)
metadata.groupby(['collection','feature_name']).token_id.count()
metadata.head()
metadata[metadata.mint_address.isnull()].collection.unique()
assert(len(metadata[metadata.mint_address.isnull()]) == 0)
for collection in metadata.collection.unique():
print(collection)
dirs = sorted(list(set(os.listdir('./data/metadata/')).intersection(set(metadata.collection.unique()))))
sorted(list(metadata.collection.unique()))
# collection = 'Bubblegoose Ballers'
it = 0
tot = len(metadata.collection.unique())
data = []
for collection in metadata.collection.unique()[:1]:
print('#{} / {}: {}'.format(it, tot, collection))
mdf = metadata[metadata.collection == collection]
results = []
for token_id in sorted(mdf.token_id.unique()):
if token_id % 1000 == 1:
print(token_id, len(results))
cur = mdf[mdf.token_id == token_id]
token_metadata = {}
# m = mints[(mints.collection == collection) & (mints.token_id == token_id) ]
m = metadata[(metadata.collection == collection) & (metadata.token_id == token_id) ]
m = m.fillna('None')
if not len(m):
print(token_id)
continue
# mint_address = m.mint_address.values[0] if 'mint_address' in m.columns else ''
mint_address = m.mint_address.values[0]
for row in cur.iterrows():
row = row[1]
token_metadata[row['feature_name']] = row['feature_value']
df.groupby('Column1')[['Column2', 'Column3']].apply(lambda g: g.values.tolist()).to_dict()
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: g.values.tolist()).to_dict()
d = {
'commission_rate': None
, 'mint_address': mint_address
, 'token_id': token_id
, 'contract_address': mint_address
, 'contract_name': row['collection']
, 'created_at_block_id': 0
, 'created_at_timestamp': str('2021-01-01')
, 'created_at_tx_id': ''
, 'creator_address': mint_address
, 'creator_name': row['collection']
, 'image_url': 'None'
, 'project_name': row['collection']
, 'token_id': int(token_id)
, 'token_metadata': token_metadata
, 'token_metadata_uri': row['image_url']
, 'token_name': row['collection']
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: list(map(tuple, g.values.tolist())) ).to_dict()
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).to_dict()
a = mdf.head(20).groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
a = metadata.groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
a.columns = ['collection','mint_address','token_id','image_url', 'token_metadata']
a['commission_rate'] = None
a['contract_address'] = a.mint_address
a['contract_name'] = a.collection
a['created_at_block_id'] = 0
a['created_at_timestamp'] = '2021-01-01'
a['created_at_tx_id'] = ''
a['creator_address'] = a.mint_address
a['creator_name'] = a.collection
a['project_name'] = a.collection
a['token_metadata_uri'] = a.image_url
a['token_name'] = a.collection
a.to_csv('./data/metadata/results.csv', index=False)
a['n'] = range(len(a))
a['n'] = a.n.apply(lambda x: int(x/50) )
a['token_id'] = a.token_id.astype(int)
cols = ['collection', 'mint_address', 'token_id', 'image_url', 'token_metadata',
'commission_rate', 'contract_address', 'contract_name',
'created_at_block_id', 'created_at_timestamp', 'created_at_tx_id',
'creator_address', 'creator_name', 'project_name', 'token_metadata_uri',
'token_name']
n = 100000
tot = int(len(a) / n) + 1
for i in range(0, len(a), n):
ind = int(i/n)
print('#{} / {}'.format(ind, tot))
g = a.head(i+n).tail(n).to_dict('records')
txt = [
{
"model": {
"blockchain": "solana",
"sinks": [
{
"destination": "{database_name}.silver.nft_metadata",
"type": "snowflake",
"unique_key": "blockchain || contract_address || token_id"
}
],
},
"results": g[x:x+50]
}
for x in range(0, len(g), 50)
]
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
# w['results'] = w.results.apply(lambda x: x[1:-1] )
w.to_csv('./data/metadata/results/{}.csv'.format(ind), index=False)
# with open('./data/metadata/results/{}.json'.format(i), 'w') as outfile:
# json.dump(results[i:i+100000], outfile)
g = a.head(200).groupby('n')[cols].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).to_dict()
g = a.head(200).groupby('n')[cols].apply(lambda g: (list(map(tuple, g.values.tolist())), {}) )
g = a.head(200).groupby('n')[cols].apply(lambda g: g.values.tolist()).reset_index()
g = a.head(200).to_dict('records')
sorted(a.collection.unique())
g = a[a.collection == 'Jungle Cats'].head(20000).to_dict('records')
txt = [
{
"model": {
"blockchain": "solana",
"sinks": [
{
"destination": "{database_name}.silver.nft_metadata",
"type": "snowflake",
"unique_key": "blockchain || contract_address || token_id"
}
],
},
"results": g[i:i+50]
}
results.append(d)
for i in range(0, len(g), 50)
]
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
# w['results'] = w.results.apply(lambda x: x[1:-1] )
w.to_csv('./data/metadata/results.csv', index=False)
with open('./data/metadata/results.txt', 'w') as outfile:
outfile.write(json.dumps(txt))
g = list(a.head(200).values)
results = a.to_dict('records')
for i in range(0, len(results), 100000):
print(i)
with open('./data/metadata/results/{}.json'.format(i), 'w') as outfile:
json.dump(results[i:i+100000], outfile)
n = 50
r = math.ceil(len(results) / n)
for i in range(r):
print('#{} / {}'.format(i, r))
newd = {
"model": {
"blockchain": "solana",
"sinks": [
{
"destination": "{database_name}.silver.nft_metadata",
"type": "snowflake",
"unique_key": "blockchain || contract_address || token_id"
}
],
},
"results": results[(i * n):((i * n)+r)]
}
data += [ json.dumps(newd) ]
with open('./data/metadata/results/{}.txt'.format(collection, i), 'w') as outfile:
outfile.write(json.dumps(newd))
# results = []
# for token_id in sorted(mdf.token_id.unique()):
# if token_id % 1000 == 1:
# print(token_id, len(results))
# cur = mdf[mdf.token_id == token_id]
# token_metadata = {}
# # m = mints[(mints.collection == collection) & (mints.token_id == token_id) ]
# m = metadata[(metadata.collection == collection) & (metadata.token_id == token_id) ]
# m = m.fillna('None')
# if not len(m):
# print(token_id)
# continue
# # mint_address = m.mint_address.values[0] if 'mint_address' in m.columns else ''
# mint_address = m.mint_address.values[0]
# for row in cur.iterrows():
# row = row[1]
# token_metadata[row['feature_name']] = row['feature_value']
# d = {
# 'commission_rate': None
# , 'mint_address': mint_address
# , 'token_id': token_id
# , 'contract_address': mint_address
# , 'contract_name': row['collection']
# , 'created_at_block_id': 0
# , 'created_at_timestamp': str('2021-01-01')
# , 'created_at_tx_id': ''
# , 'creator_address': mint_address
# , 'creator_name': row['collection']
# , 'image_url': row['image_url']
# , 'project_name': row['collection']
# , 'token_id': int(token_id)
# , 'token_metadata': token_metadata
# , 'token_metadata_uri': row['image_url']
# , 'token_name': row['collection']
# }
# results.append(d)
print('Uploading {} results'.format(len(results)))
dir = './data/metadata/{}/'.format(collection)
@ -466,6 +618,7 @@ def solana():
},
"results": results[(i * n):((i * n)+r)]
}
data += [ json.dumps(newd) ]
with open('./data/metadata/{}/{}.txt'.format(collection, i), 'w') as outfile:
outfile.write(json.dumps(newd))

View File

@ -1,9 +1,8 @@
import collections
import re
import os
import json
import time
import math
from tkinter import SEL
import requests
import pandas as pd
import urllib.request
@ -11,11 +10,24 @@ import snowflake.connector
from bs4 import BeautifulSoup
from time import sleep
import cloudscraper
from theblockchainapi import SolanaAPIResource, SolanaNetwork, SearchMethod
# Get an API key pair for free here: https://dashboard.blockchainapi.com/api-keys
MY_API_KEY_ID = 'sLbjx8YFYdTtUuH'
MY_API_SECRET_KEY = 'p24pFaM9lLbWscN'
BLOCKCHAIN_API_RESOURCE = SolanaAPIResource(
api_key_id=MY_API_KEY_ID,
api_secret_key=MY_API_SECRET_KEY
)
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
from solana_model import just_float
from utils import clean_name, clean_token_id, format_num
from utils import clean_name, clean_token_id, format_num, merge
#########################
# Connect to DB #
@ -100,6 +112,19 @@ def add_collection_steps():
# 5. run model
pass
def create_upload_file():
cols = [ 'collection','mint_address' ]
a = pd.read_csv('./data/mints-2022-06-13-2pm.csv')[cols]
b = pd.read_csv('~/Downloads/manual_labels.csv')
b.columns = cols
c = pd.read_csv('~/Downloads/solscan_collections.csv')[cols]
d = pd.read_csv('./data/tokens.csv')[cols]
df = pd.concat([a, b, c, d]).drop_duplicates(subset=['mint_address'], keep='last')
df.to_csv('~/Downloads/mints-2022-06-13-5pm.csv', index=False)
tmp = pd.read_csv('~/Downloads/mints-2022-06-13-5pm.csv')
tmp[tmp.mint_address == 'EhuVN896QVypRreAt6mcJr6eKkKunVzsgSRz7qt4oeBr']
def manual_clean():
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
df = pd.read_csv('./data/{}.csv'.format(c))
@ -108,7 +133,149 @@ def manual_clean():
df['clean_token_id'] = df.token_id
df.to_csv('./data/{}.csv'.format(c), index=False)
def pull_from_solscan():
todo = [
['50a75e6d3d0b6d4a72b2f745fdba4b1c28bc774ca9629fe8e36053ae2fb396f8','Degen Egg']
, ['45e3f45d695e9e8775eed480cb0f5a6a957d47dcb3ed3800e454846dca9ab7fc','Genopets']
, ['a437071c6f9679e8431a072ae39421262bf289cc6ead21e38190d5b7b409e7f7','Shin Sengoku']
, ['d38349f2704e8cd1c538cc48fbea4b3e2596ac8da14b62c0eb3c07aeda7ae75e','SolStein']
, ['9e0593a4842ceb9ccdc510e6ffdf0d84f736bff2b58d5803c5002ace17df9fe0','Zillaz NFT']
, ['895d8f01108fbb6b28c5e32027c9c98e3054241927c8e59c304fa4763c5c88ea','enviroPass Tier 02']
, ['59c2a35d902f85feec4c774df503a0df2be263f763dcbcb73bce50c999fc2c78','The Fracture']
, ['e8dfb059b1dfc71cf97342a1c46793bc5e154909416a93a155929da5bba44a57','Suteki']
, ['271e0d68d069d80afbcb916e877831b060933b97e7b02e1cfb77e74b228b4745','Chillchat']
]
start = time.time()
data = []
meta = []
it = 0
tot = len(todo)
for collectionId, collection in todo:
it += 1
print('#{} / {}'.format(it, tot))
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
offset = 0
limit = 500
while True:
print(offset)
url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
r = requests.get(url)
js = r.json()['data']
offset += limit
if len(js) == 0:
break
for j in js:
data += [[ collectionId, collection, j['info']['mint'] ]]
m = j['info']['meta']
m['mint_address'] = j['info']['mint']
# m['name'] = row['name']
# m['update_authority'] = update_authority
meta += [ m ]
it += 1
end = time.time()
print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
df = pd.DataFrame(data, columns=['collection_id','collection','mint_address'])
df.to_csv('~/Downloads/solscan_collections.csv', index=False)
df[['collection','mint_address']].to_csv('~/Downloads/mints-2022-06-14-8am.csv', index=False)
df.groupby('collection').mint_address.count()
def collecitons_from_missing_tokens():
query = '''
WITH base AS (
SELECT block_timestamp::date AS date
, s.*
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
FROM solana.fact_nft_sales s
LEFT JOIN solana.dim_labels l on s.mint = l.address
WHERE marketplace in ('magic eden v1', 'magic eden v2')
AND block_timestamp >= '2022-01-01'
AND l.address IS NULL
AND sales_amount >= 10
)
SELECT *
FROM base
WHERE rn % 20 = 0
ORDER BY sales_amount DESC
LIMIT 500
'''
missing = ctx.cursor().execute(query)
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
missing = clean_colnames(missing)
missing.head()
headers = {
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
}
it = 0
tot = len(missing)
data = []
for m in missing.mint.unique():
it += 1
if it % 10 == 0:
print('#{} / {} ({})'.format(it, tot, len(data)))
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(m)
r = requests.get(url, headers=headers)
j = r.json()
data.append(j)
pass
df = pd.DataFrame(data)
df.head()[['collection','mintAddress']]
df.to_csv('~/Downloads/tmp.csv', index=False)
need = df.groupby(['collection','updateAuthority']).mintAddress.count().reset_index().sort_values('mintAddress', ascending=0)
need = need[need.mintAddress > 1].rename(columns={'updateAuthority':'update_authority'})
need.to_csv('~/Downloads/missing.csv', index=False)
need.head()
sorted(need.collection.unique())
need['collection'] = need.collection.apply(lambda x: re.sub('_', ' ', x.title()).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
us = sorted(g[g.mintAddress > 1].updateAuthority.unique())
tot = len(us)
it = 0
for u in us:
it += 1
print('#{} / {} ({})'.format(it, tot, len(data)))
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
update_authority = u
, update_authority_search_method = SearchMethod.EXACT_MATCH
)
print(u, len(nfts))
for n in nfts:
m = n['nft_metadata']
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
def manual_tags():
d = {
'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g': 'Degenerate Ape Kindergarten'
, 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF': 'Botheads'
}
a = 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF'
c = 'Botheads'
labels = pd.DataFrame()
for a, c in d.items():
query = '''
SELECT DISTINCT instructions[1]:parsed:info:mint::string AS mint_address
FROM solana.fact_transactions
WHERE instructions[1]:parsed:info:mintAuthority = '{}'
'''.format(a)
df = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
df = clean_colnames(df)
df['collection'] = c
labels = labels.append(df)
labels.to_csv('~/Downloads/manual_labels.csv', index=False)
def mints_from_me():
##################################
# Get All ME Collections #
##################################
headers = {
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
}
@ -144,6 +311,9 @@ def mints_from_me():
# lp_df.to_csv('./data/me_lp_collections.csv', index=False)
# lp_df = pd.read_csv('./data/me_lp_collections.csv')
###########################################
# Get 1 Mint From Each Collection #
###########################################
it = 0
l_data = []
old_l_df = pd.read_csv('./data/me_mints.csv')
@ -154,7 +324,7 @@ def mints_from_me():
it += 1
row = row[1]
print('Listings on {}...'.format(row['symbol']))
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/listings?offset=0&limit=1'.format(row['symbol'])
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/activities?offset=0&limit=1'.format(row['symbol'])
if row['symbol'] in seen:
print('Seen')
continue
@ -218,9 +388,38 @@ def mints_from_me():
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
# get missing collections
query = '''
WITH base AS (
SELECT block_timestamp::date AS date
, s.*
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
FROM solana.fact_nft_sales s
LEFT JOIN solana.dim_labels l on s.mint = l.address
WHERE marketplace in ('magic eden v1', 'magic eden v2')
AND block_timestamp >= '2022-01-01'
AND block_timestamp <= '2022-05-20'
AND l.address IS NULL
AND sales_amount > 20
)
SELECT *
FROM base
WHERE rn % 50 = 1
LIMIT 100
'''
missing = ctx.cursor().execute(query)
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
missing = clean_colnames(missing)
######################################################
# Get Update Authorities For All Collections #
######################################################
l_df = pd.read_csv('./data/me_mints.csv')
len(l_df)
l_df.head()
m_old = pd.read_csv('./data/me_update_authorities.csv')
m_data = list(m_old.values)
m_old['seen'] = 1
m_data = list(m_old[['symbol','name','update_authority']].values)
seen = [ x[0] for x in m_data ]
print('Seen {} m_data'.format(len(seen)))
l_df = l_df[-l_df.symbol.isin(seen)]
@ -258,19 +457,583 @@ def mints_from_me():
m_df.to_csv('./data/me_update_authorities.csv', index=False)
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
m_df = m_df.drop_duplicates()
print('Adding {} rows to me_mints'.format(len(m_df) - len(m_old)))
print('Adding {} rows to me_update_authorities'.format(len(m_df) - len(m_old)))
m_df.to_csv('./data/me_update_authorities.csv', index=False)
m_df.tail(134).head(20)
m_df = m_df.tail(134)
query = '''
SELECT DISTINCT project_name, LOWER(project_name) AS lower_name
FROM crosschain.address_labels
WHERE blockchain = 'solana'
AND label_subtype = 'nf_token_contract'
AND project_name IS NOT NULL
'''
labels = ctx.cursor().execute(query)
labels = pd.DataFrame.from_records(iter(labels), columns=[x[0] for x in labels.description])
labels = clean_colnames(labels)
labels.to_csv('~/Downloads/tmp-la.csv', index=False)
######################################################
# Get Update Authorities For All Collections #
######################################################
m_df = pd.read_csv('./data/me_update_authorities.csv')
def f(x):
x = re.sub('\(|\)', '', x)
x = re.sub(' ', '_', x)
x = re.sub('\'', '', x)
return(x)
m_df['collection'] = m_df.name.apply(lambda x: f(x) )
m_df['seen'] = (-m_df.name.isin(m_df.name.tail(134).values)).astype(int)
m_df['lower_name'] = m_df.name.apply(lambda x: x.lower() )
seen = list(labels.lower_name.unique())
m_df['seen'] = m_df.lower_name.isin(seen).astype(int)
n_auth = m_df.groupby('update_authority').name.count().reset_index().rename(columns={'name':'n_auth'})
m_df = m_df.merge(n_auth)
len(m_df[m_df.seen == 0])
len(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1)])
len(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1)])
x = 'asf (asf)'
f(x)
m_df.to_csv('~/Downloads/tmp-m_df.csv', index=False)
len(m_df.name.unique())
need = list(m_df[m_df.seen == 0].update_authority.unique())
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1) ].update_authority.unique())
len(need)
# need = need + [
# need = [
# 'CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k' # taiyo
# , 'DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX' # DAA
# , 'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g' # Degen Egg
# , 'BL5U8CoFPewr9jFcKf3kE1BhdFS1J59cwGpeZrm7ZTeP' # Skullbot
# , 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf' # Boryoku
# , '7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D' # mindfolk
# , 'CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5' # jelly rascals
# , 'EcxEqUj4RNgdGJwPE3ktsM99Ea9ThPmXHUV5g37Qm4ju' # women monkey
# , 'EQSoRhbN9fEEYXKEE5Lg63Mqf17P3JydcWTvDhdMJW1N' # hydrascripts
# , '75CPiM9ywLgxhii9SQsNoA1SH3h66o5EhrYsazHR5Tqk' # hydrascripts
# , 'aury7LJUae7a92PBo35vVbP61GX8VbyxFKausvUtBrt' # aurory
# , 'ET3LWbEL6q4aUSjsX5xLyWktCwqKh6qsQE5j6TDZtZBY' # enviropass
# , '8ERR2gYrvXcJFuoNAbPRvHXtrJnAXXHgXKkVviwz9R6C' # enviroPass
# , 'GRDCbZBP1x2JxYf3rQQoPFGzF57LDPy7XtB1gEMaCqGV' # Space Robots
# , 'GenoS3ck8xbDvYEZ8RxMG3Ln2qcyoAN8CTeZuaWgAoEA' # Genopet
# , 'STEPNq2UGeGSzCyGVr2nMQAzf8xuejwqebd84wcksCK' # stepn
# , 'HcS8iaEHwUino8wKzcgC16hxHodnPCyacVYUdBaSZULP' # BASC
# , 'AvkbtawpmMSy571f71WsWEn41ATHg5iHw27LoYJdk8QA' # THUG
# , 'GH4QhJznKEHHv44AqEH5SUohkUauWyAFtu5u8zUWUKL4' # StepN Shoebox
# , 'FTQmhcD7SNBWrVxTgQMFr7xL2aA6adfAJJPBxGKU4VsZ' # Solstien
# ]
need = m_df[m_df.update_authority.isin(need)]
# m_df[m_df.lower_name.isin(seen)]
# m_df[-m_df.lower_name.isin(seen)]
# tmp = m_df[['update_authority','collection']].drop_duplicates().groupby(['update_authority']).collection.count().reset_index().rename(columns={'collection':'n_collection'})
# tmp = tmp.sort_values('n_collection', ascending=0)
# m_df = m_df.merge(tmp)
# m_df = m_df.sort_values(by=['n_collection','update_authority','collection'], ascending=[0,0,0])
l_df = pd.read_csv('./data/me_mints.csv')
fix = need.merge(l_df[[ 'name','mint_address' ]])
# len(need.name.unique())
# len(fix.name.unique())
# fix = fix.sort_values(by=['update_authority','collection'], ascending=[0,0])
# fix.head()
# seen = []
# data = []
# meta = []
# fix = fix[-(fix.name.isin(seen))]
# start = time.time()
# it = 0
# tot = len(fix)
# scraper = cloudscraper.create_scraper()
# # for each collection
# for row in fix.iterrows():
# row = row[1]
# print(row['name'])
# if row['name'] in seen:
# print('Seen')
# continue
# url = 'https://api.solscan.io/nft/detail?mint={}'.format(row['mint_address'])
# t = scraper.get(url).text
# j = json.loads(t)
# # r = requests.get(url)
# # j = r.json()
# j['data']
# if not j['success']:
# print('Error')
# print(r)
# print(j)
# sleep(1)
# continue
# update_authority = j['data']['updateAuthority']
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
# offset = 0
# limit = 500
# while True:
# print(offset)
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
# r = requests.get(url)
# js = r.json()['data']
# offset += limit
# if len(js) == 0:
# break
# for j in js:
# data += [[ update_authority, collectionId, collection, row['symbol'], row['name'], row['collection'], j['info']['mint'] ]]
# m = j['info']['meta']
# m['mint_address'] = j['info']['mint']
# m['name'] = row['name']
# m['update_authority'] = update_authority
# meta += [ m ]
# it += 1
# end = time.time()
# print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
# old = pd.read_csv('./data/nft_label_tokens.csv')
# token_df = pd.DataFrame(data, columns=['update_authority','collectionId','solscan_collection','symbol','name','collection','mint'])
# token_df = token_df.append(old).drop_duplicates()
# token_df.to_csv('./data/nft_label_tokens.csv', index=False)
# old = pd.read_csv('./data/nft_label_metadata.csv')
# meta_df = pd.DataFrame(meta)
# meta_df = meta_df.append(old).drop_duplicates()
# meta_df.to_csv('./data/nft_label_metadata.csv', index=False)
# seen = list(token_df.name.unique())
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
# tmp[tmp.collection > 1]
# m_df.head()
# def f(x):
# x = re.sub('\(|\)', '', x)
# x = re.sub(' ', '_', x)
# x = re.sub('\'', '', x)
# return(x)
# m_df['collection'] = m_df.name.apply(lambda x: f(x) )
# x = 'asf (asf)'
# f(x)
# query = '''
# WITH base AS (
# SELECT *
# , ROW_NUMBER() OVER (PARTITION BY project_name ORDER BY insert_date DESC) AS rn
# FROM crosschain.address_labels
# WHERE blockchain = 'solana'
# AND label_subtype = 'nf_token_contract'
# )
# SELECT *
# FROM base
# '''
# examples = ctx.cursor().execute(query)
# examples = pd.DataFrame.from_records(iter(examples), columns=[x[0] for x in examples.description])
# examples = clean_colnames(examples)
# examples.head()
# examples[examples.address_name == 'paradisedao'].head()
# examples[examples.address == 'GUXSatf5AAFKmuQgSgn4GoGzBEhwJ9WAQRxeVt1vZvkb'].head()
# # m_df = pd.read_csv('./data/me_update_authorities.csv')
# # fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# len(m_df[m_df.n_collection > 1].name.unique())
# len(fix.name.unique())
# j = list(fix.address.unique())
# with open('./data/fix_mints.json', 'w') as f:
# json.dump(j, f)
# seen = list(examples.address.unique())
# seen = []
# need = df[-df.mint_address.isin(seen)].sort_values(['collection','mint_address'])
# CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k - taiyo
# DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX - DAA
# DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf - Boryoku
# 7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D - mindfolk
# CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5 - mindfolk
need = fix.copy().rename(columns={'name':'collection'})
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
need = need.drop_duplicates(subset=['update_authority']).sort_values('collection')
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
need.collection.unique()
# need = need.drop_duplicates(subset=['collection']).sort_values('collection')
n = 0
# 1310 - 310
# need = need.tail(n).head(300).tail(25)
# need = need.tail(1009).head(17)
# need = need.tail(1009 - 17).head(17)
# 1-285, 1310-975
len(need)
# print(n)
mfiles = ['/data/mints/{}/{}_mint_accounts.json'.format(re.sub(' |-', '_', collection), update_authority) for collection, update_authority in zip(need.collection.values, need.update_authority.values) ]
seen = [ x for x in mfiles if os.path.exists(x) ]
seen = []
# for update authorities that have only 1 collection, we can just check metaboss once
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
# need = need.tail(400)
it = 0
tot = len(need)
for row in need.iterrows():
it += 1
row = row[1]
collection = row['collection']
print('#{} / {}: {}'.format(it, tot, collection))
# if collection in seen:
# continue
update_authority = row['update_authority']
# print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = './data/mints/{}/'.format(collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
seen.append(update_authority)
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# write the mints to csv
data = []
for path in os.listdir('./data/mints/'):
if os.path.isdir('./data/mints/'+path):
collection = re.sub('_', ' ', path).strip()
for fname in os.listdir('./data/mints/'+path):
f = './data/mints/'+path+'/'+fname
if os.path.isfile(f) and '.json' in f:
with open(f) as file:
j = json.load(file)
for m in j:
data += [[ collection, m ]]
df = pd.DataFrame(data, columns=['collection','mint_address'])
df.collection.unique()
df.to_csv('./data/single_update_auth_labels.csv', index=False)
################################
# Multiple Authorities #
################################
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1) ].update_authority.unique())
need = m_df[m_df.update_authority.isin(need)]
fix = need.merge(l_df[[ 'name','mint_address' ]])
need = fix.copy().rename(columns={'name':'collection'})
need = need.sort_values('collection').drop_duplicates(subset=['update_authority'], keep='first')
i = 5
sz = 112
t = len(need) - (sz * (i - 1)) if sz * i > len(need) else sz
print(t)
need = need.head(sz * i).tail(t)
# need = need.head(150 * 2).tail(150)
# need = need.head(150 * 3).tail(150)
# need = need.head(150 * 4).tail(150)
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
need.collection.unique()
it = 0
a = []
print(i)
for row in need.iterrows():
it += 1
# if it < 20:
# continue
# if it % 100 == 0:
# print('#{}/{}'.format(it, len(m_df)))
print('#{}/{}'.format(it, len(need)))
row = row[1]
collection = row['collection']
if collection in seen:
continue
update_authority = row['update_authority']
print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = './data/mints/{}/'.format(collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
print('LETS GOOO')
a.append(update_authority)
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# len(need)
# len(need.drop_duplicates(subset=['mint_address']))
# len(need.collection.unique())
# tot = len(need.collection.unique())
# it = 0
# # for each collection, get all the mints from metaboss
# for c in need.collection.unique():
# it += 1
# print('#{} / {}: {}'.format(it, tot, c))
# dir = './data/fix_labels_1/{}/'.format(re.sub(' ', '_', c))
odir = dir+'output/'
# if not os.path.exists(dir):
# print('Making dir {}'.format(dir))
# os.makedirs(dir)
if not os.path.exists(odir):
print('Making dir {}'.format(odir))
os.makedirs(odir)
# elif os.path.exists(dir+'mints.json'):
# print('Already Seen')
# continue
# ms = list(need[need.collection == c].mint_address.unique())
# with open(dir+'mints.json', 'w') as f:
# json.dump(ms, f)
os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, mfile, odir ))
##################################################
# Load All The Mints for Each Collection #
##################################################
# now that we have the mints, create a data frame with the info for each mint in each collection
data = []
seen = [ x[1] for x in data ]
it = 0
dirs = os.listdir('./data/mints/')
for path in dirs:
print(it)
it += 1
if os.path.isdir('./data/mints/'+path):
collection = re.sub('_', ' ', path).strip()
if not os.path.exists('./data/mints/'+path+'/output/'):
continue
fnames = os.listdir('./data/mints/'+path+'/output/')
print(collection, len(fnames))
for fname in fnames:
f = './data/mints/'+path+'/output/'+fname
if fname[:-5] in seen:
continue
if os.path.isfile(f) and '.json' in f:
try:
with open(f) as file:
j = json.load(file)
data += [[ collection, fname, j['name'], j['symbol'], j['uri'] ]]
except:
print('Error {}'.format(fname[:-5]))
##################################################
# Load All The Mints for Each Collection #
##################################################
new_mints = pd.DataFrame(data, columns=['collection','mint_address','name','symbol','uri'])
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
n = len(new_mints[(new_mints.uri.isnull()) | (new_mints.uri == '')])
tot = len(new_mints)
pct = round(n * 100 / tot, 1)
print('{} ({}%) rows have no uri'.format(n, pct))
new_mints = new_mints[new_mints.uri != '']
# function to clean the name of each NFT (remove the number)
def f_cn(x):
if not x or x != x:
return(x)
if '#' in x[-6:]:
x = ''.join(re.split('#', x)[:-1]).strip()
elif bool(re.match('.+\s+[0-9]+', x)):
x = ' '.join(re.split(' ', x)[:-1]).strip()
return(x)
new_mints['clean_name'] = new_mints.name.apply(lambda x: f_cn(x) )
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
a = new_mints.drop_duplicates(subset=['collection','clean_name','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
symbol_only = a[a.uri > 10].collection.unique()
b = new_mints[new_mints.collection.isin(symbol_only)].drop_duplicates(subset=['collection','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
collection_only = b[b.uri > 10].collection.unique()
# now get the info for each collection-name-symbol combo
g1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','clean_name','symbol']).head(1).reset_index()
g2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','symbol']).head(1).reset_index()
g3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ].groupby(['collection']).head(1).reset_index()
g = g1.append(g2).append(g3).drop_duplicates(subset=['mint_address'])
print('{} Total: {} all, {} collection-symbol {} collection'.format(len(g), len(g1), len(g2), len(g3)))
g.to_csv('~/Downloads/tmp-g.csv', index=False)
# iterate over each row to get what collection they are actually in
# by pulling data from the uri
uri_data = []
it = 0
tot = len(g)
print(tot)
errs = []
seen = [ x['uri'] for x in uri_data ]
# for row in g.iterrows():
for row in g[ -(g.uri.isin(seen)) ].iterrows():
row = row[1]
it += 1
if it % 100 == 0:
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
print('#{} / {}: {}'.format(it, tot, row['collection']))
try:
r = requests.get(row['uri'])
j = r.json()
j['uri'] = row['uri']
j['row_collection'] = row['collection']
j['row_clean_name'] = row['clean_name']
j['row_symbol'] = row['symbol']
j['mint_address'] = row['mint_address']
uri_data += [j]
except:
print('Error')
errs.append(row)
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
# for each row, parse the json from the uri
uri_df = pd.read_csv('~/Downloads/uri_df.csv')
def f(x, c):
x = str(x)
try:
n = json.loads(re.sub("'", "\"", x))[c]
if type(n) == list:
return(n[0])
return(n)
except:
try:
return(json.loads(re.sub("'", "\"", x))[c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0][c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0])
except:
return(x)
# parse the json more
uri_df['parsed_collection'] = uri_df.collection.apply(lambda x: f(x, 'name') )
uri_df['parsed_family'] = uri_df.collection.apply(lambda x: f(x, 'family') )
uri_df['clean_name'] = uri_df.name.apply( lambda x: f_cn(x) )
# calculate what the collection name is
uri_df['use_collection'] = uri_df.parsed_collection.replace('', None).fillna( uri_df.clean_name )#.fillna( uri_df.row_symbol )
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].head()
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].to_csv('~/Downloads/tmp.csv', index=False)
len(uri_df)
# clean the collection name
def f1(x):
try:
if len(x['use_collection']) == 1:
return(x['clean_name'])
if bool(re.match('.+\s+#[0-9]+', x['use_collection'])):
return(''.join(re.split('#', x['use_collection'])[:-1]).strip())
if '{' in x['use_collection']:
return(x['clean_name'])
return(x['use_collection'].strip().title())
except:
return(x['use_collection'].strip().title())
uri_df['tmp'] = uri_df.apply(lambda x: f1(x), 1 )
uri_df[uri_df.tmp == 'Nan']['use_collection','tmp']
uri_df['use_collection'] = uri_df.apply(lambda x: f1(x), 1 )
sorted(uri_df.use_collection.unique())[:20]
sorted(uri_df.use_collection.unique())[-20:]
# clean the mint_address
uri_df['mint_address'] = uri_df.mint_address.apply(lambda x: re.sub('.json','', x))
uri_df.head()
uri_df = uri_df.fillna('None')
for i in range(2):
# for each collection-name-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_clean_name','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_clean_name','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_1'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_2'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','use_collection']].drop_duplicates().groupby(['row_collection']).use_collection.count().reset_index().rename(columns={'use_collection':'n_3'})
uri_df = merge(uri_df, a, ensure=True)
uri_df['n'] = uri_df.apply(lambda x: x['n_3'] if x['row_collection'] in collection_only else x['n_2'] if x['row_collection'] in symbol_only else x['n_1'], 1 )
print('{} / {} ({}%) have multiple collection-name-symbol mappings'.format(len(uri_df[uri_df.n > 1]), len(uri_df), round( 100.0 * len(uri_df[uri_df.n > 1]) / len(uri_df))))
# if there is multiple, use the parsed_family instead of the use_collection
uri_df['use_collection'] = uri_df.apply(lambda x: x['use_collection'] if x['n'] == 1 else x['parsed_family'], 1 )
del uri_df['n_1']
del uri_df['n_2']
del uri_df['n_3']
# only take rows where there is a single mapping
m = uri_df[uri_df.n==1][[ 'use_collection','row_collection','row_clean_name','row_symbol' ]].dropna().drop_duplicates()
m.columns = [ 'use_collection','collection','clean_name','symbol' ]
m_1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].fillna('').merge(m.fillna(''), how='left')
m_2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address','symbol' ]].fillna('').merge(m.fillna(''), how='left')
m_3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address' ]].fillna('').merge(m.fillna(''), how='left')
len(m_1) + len(m_2) + len(m_3)
len(new_mints)
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
m = m_1.append(m_2).append(m_3)
print('After all this, we have {}% of the mints'.format( round(len(m) * 100 / len(new_mints)) ))
len(new_mints)
len(m)
m['mint_address'] = m.mint_address.apply(lambda x: re.sub('.json', '', x) )
m = m[['mint_address','use_collection']].dropna().drop_duplicates()
m.columns = ['mint_address','collection']
m[m.collection.isnull()].head()
m[m.collection=='Nan'].head()
m = m[m.collection != 'Nan']
tmp = m.groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
tmp.head()
m.to_csv('./data/mult_update_auth_labels.csv', index=False)
################
# DONE #
################
tokens = m.append(pd.read_csv('./data/tokens.csv')[['collection','mint_address']]).drop_duplicates(subset=['mint_address'], keep='last')
tokens.to_csv('./data/mints-2022-06-13-2pm.csv', index=False)
tokens.head()
m.to_csv('./data/mints-2022-06-09.csv', index=False)
m = pd.read_csv('./data/mints-2022-06-09.csv')
m.groupby('collection').head(1).to_csv('~/Downloads/tmp.csv', index=False)
len(m)
len(m.mint_address.unique())
m.head()
m.head()
# m = m.merge(symbol_map, how='left', on='symbol')
# m['use_collection'] = m.use_collection_x.fillna(m.use_collection_y)
len(new_mints)
len(m)
len(m[m.use_collection.isnull()])
len(m[m.use_collection.isnull()]) / len(m)
len(m[m.use_collection_x.isnull()]) / len(m)
m[m.use_collection.isnull()].fillna('').drop_duplicates(subset=['collection','clean_name','symbol']).to_csv('~/Downloads/tmp-3.csv', index=False)
m[m.use_collection.isnull()].drop_duplicates(subset=['collection']).to_csv('~/Downloads/tmp-3.csv', index=False)
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')].groupby('row_clean_name').uri.count().reset_index()
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')]
uri_df.head()
uri_df['row_clean_name'] = uri_df.row_clean_name.apply(lambda x: f_cn(x) )
id_map = uri_df
a.to_csv('~/Downloads/tmp-1.csv', index=False)
len(uri_df)
n = uri_df.groupby()
uri_df
uri_df
uri_df.head()
uri_df[['symbol','collection','']]
uri_df.head()
query = '''
SELECT DISTINCT project_name
@ -294,6 +1057,26 @@ def mints_from_me():
[x for x in seen if not x in m_df.tmp.unique()][:11]
m_df[m_df.symbol == 'apesquad']
m_df[m_df.symbol == 'chimp_frens']
url = 'https://api.solscan.io/nft/detail?mint=D5pT5HYPeQkHD6ryoHxnc2jdcUMYmjs6sS6LswbSDsuy'
us = sorted(m_df[m_df.n_collection > 1].update_authority.unique())
u = us[1]
m_df[m_df.update_authority == u]
m_df[m_df.mint == 'G3xiAFZEp49BJc8nNrDJxwTXZ34teKH7CRf5KTGakxte']
data = []
for u in us[:10]:
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
update_authority = u
, update_authority_search_method = SearchMethod.EXACT_MATCH
)
print(u, len(nfts))
for n in nfts:
m = n['nft_metadata']
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
nft_df = pd.DataFrame(data, columns=['update_authority','mint','symbol','name'])
len(nft_df.update_authority.unique())
nft_df['collection'] = nft_df.name.apply(lambda x: re.split('#', x)[0].strip() )
nft_df.groupby(['symbol','collection']).mint.count()
nft_df.groupby(['symbol','name']).mint.count()
print(len(seen))
# m_df = m_df.merge(lp_df)
len(m_df)
@ -335,7 +1118,6 @@ def mints_from_me():
# os.makedirs(dir_mints)
# os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, fname, dir_mints))
data = []
for path in os.listdir('./data/mints/'):
if os.path.isdir('./data/mints/'+path):

Binary file not shown.

View File

@ -1,12 +1,132 @@
import re
import os
import json
import pandas as pd
import snowflake.connector
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
from solana_model import get_sales
from scrape_sol_nfts import clean_name
def get_ctx():
usr = os.getenv('SNOWFLAKE_USR')
pwd = os.getenv('SNOWFLAKE_PWD')
# with open('snowflake.pwd', 'r') as f:
# pwd = f.readlines()[0].strip()
# with open('snowflake.usr', 'r') as f:
# usr = f.readlines()[0].strip()
ctx = snowflake.connector.connect(
user=usr,
password=pwd,
account='vna27887.us-east-1'
)
return(ctx)
def clean_colnames(df):
names = [ x.lower() for x in df.columns ]
df.columns = names
return(df)
def overlap():
query = '''
WITH sales AS (
SELECT l.label AS collection, SUM(sales_amount) AS volume, MIN(block_timestamp::date) AS first_sale_date
FROM solana.fact_nft_sales s
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(s.mint)
WHERE block_timestamp >= CURRENT_DATE - 30
GROUP BY 1
), base AS (
SELECT *
, ROW_NUMBER() OVER (ORDER BY volume DESC) AS volume_rank
FROM sales
ORDER BY volume DESC
LIMIT 50
), b2 AS (
SELECT DISTINCT collection, first_sale_date, volume_rank, purchaser, mint
FROM solana.fact_nft_sales s
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(s.mint)
JOIN base b ON b.collection = l.label
UNION
SELECT DISTINCT collection, first_sale_date, volume_rank, purchaser, mint
FROM solana.fact_nft_mints m
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(m.mint)
JOIN base b ON b.collection = l.label
)
SELECT DISTINCT INITCAP(collection) AS collection, first_sale_date, date_trunc('month', first_sale_date) AS first_sale_month, volume_rank, purchaser, mint
FROM b2
'''
ctx = get_ctx()
df = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
df = clean_colnames(df)
df[df.collection == 'okay bears']
len(df[df.collection == 'okay bears'].mint.unique())
data = []
list(df.collection.unique()).index(a)
list(df.collection.unique()).index(b)
cur = df[df.volume_rank <= 50]
for a in cur.collection.unique():
print(a)
a1 = set(cur[cur.collection == a].purchaser.unique())
ar = cur[cur.collection == a].volume_rank.values[0]
am = cur[cur.collection == a].first_sale_month.values[0]
# for b in cur[cur.collection > a].collection.unique():
for b in cur.collection.unique():
b1 = set(cur[cur.collection == b].purchaser.unique())
br = cur[cur.collection == b].volume_rank.values[0]
bm = cur[cur.collection == b].first_sale_month.values[0]
data += [[ a, b, int(a < b), am, bm, ar, br, len(a1), len(b1), len(a1.intersection(b1)) ]]
cur = pd.DataFrame(data, columns=['col_1','col_2','include','am','bm','r_1','r_2','n_1','n_2','n_int'])
cur['pct'] = cur.apply(lambda x: x['n_int'] / min(x['n_1'], x['n_2']), 1 )
cur = cur[cur.n_int.notnull()]
cur.to_csv('~/Downloads/overlap.csv', index=False)
cur.include.unique()
def add_back_metadata():
query = '''
SELECT *
FROM solana.dim_nft_metadata
WHERE LOWER(project_name) IN (
'degods'
, 'astrals'
, 'solstein'
, 'solgods'
, 'okay bears'
, 'meerkat millionaires'
, 'catalina whale mixer'
, 'citizens by solsteads'
, 'defi pirates'
)
'''
ctx = get_ctx()
mdf = ctx.cursor().execute(query)
mdf = pd.DataFrame.from_records(iter(mdf), columns=[x[0] for x in mdf.description])
print('Loaded {} metadata'.format(len(mdf)))
mdf = clean_colnames(mdf)
mdf = mdf[[ 'contract_name','token_id','token_metadata' ]]
m = json.loads(mdf.token_metadata.values)
m = [json.loads(x) for x in mdf.token_metadata.values]
data = []
collection = mdf.contract_name.values
token_id = mdf.token_id.values
for i in range(len(m)):
for k, v in m[i].items():
data += [[ collection[i], token_id[i], k, v ]]
old = pd.read_csv('./data/metadata.csv')
metadata = pd.DataFrame(data, columns=['collection','token_id','feature_name','feature_value'])
del old['chain']
old = old.append(metadata)
old['collection'] = old.collection.apply(lambda x: clean_name(x) )
old = old.drop_duplicates(subset=['collection','token_id','feature_name'], keep='last')
old[old.collection == 'Cets On Creck'].feature_name.unique()
old[old.collection == 'Cets on Creck'].feature_name.unique()
tmp = old[['collection','feature_name']].drop_duplicates().groupby('collection').feature_name.count().reset_index()
tmp.to_csv('~/Downloads/tmp-1.csv', index=False)
old.to_csv('./data/metadata.csv', index=False)
def add_sf_metadata():
old = pd.read_csv('./data/metadata.csv')
l0 = len(old)
@ -141,8 +261,25 @@ def add_att_count():
print('Adding {} rows'.format(l1 - l0))
m_df.to_csv('./data/metadata.csv', index=False)
def tmp():
m1 = pd.read_csv('./data/metadata.csv')
m2 = pd.read_csv('./data/metadata_2.csv')
t1 = pd.read_csv('./data/tokens.csv')
t2 = pd.read_csv('./data/tokens_2.csv')
m = m1.append(m2).drop_duplicates(keep='last')
t = t1.append(t2).drop_duplicates(keep='last')
t.to_csv('./data/tokens.csv', index=False)
m.to_csv('./data/metadata.csv', index=False)
def add_rarities():
include = [ 'DeGods' ]
m_df = pd.read_csv('./data/metadata.csv')
# m_df = m_df[-m_df.collection.isin([''])]
g0 = m_df.groupby('collection').token_id.count().reset_index()
m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
# m_df = m_df[m_df.collection.isin(include)]
# m_df['feature_name'] = m_df.feature_name.fillna(m_df.name)
# m_df['feature_value'] = m_df.feature_value.fillna(m_df.value)
for c in [ 'name','value','rarity' ]:
@ -164,6 +301,8 @@ def add_rarities():
# m_df[m_df.collection == 'BAYC'].feature_name.unique()
tokens = pd.read_csv('./data/tokens.csv')[['collection','token_id','nft_rank']]
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
# tokens = tokens[tokens.collection.isin(include)]
tokens[((tokens.collection == 'Pesky Penguins')) & (tokens.token_id=='6437')]
tokens[((tokens.collection == 'Pesky Penguins')) & (tokens.token_id==6437)]
tokens[tokens.collection == 'SOLGods']
@ -287,6 +426,10 @@ def add_rarities():
sorted(m_df.collection.unique())
l1 = len(m_df)
g1 = m_df.groupby('collection').token_id.count().reset_index()
g = g0.merge(g1, how='outer', on=['collection'])
g['dff'] = g.token_id_y - g.token_id_x
print(g[g.dff != 0].sort_values('dff', ascending=0))
print('Adding {} rows'.format(l1 - l0))
# m_df[m_df.collection == 'Galactic Angels']
# m_df[ (m_df.collection == 'Galactic Angels') & (m_df.token_id == '1') ]

View File

@ -23,7 +23,7 @@ import cloudscraper
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
from utils import clean_token_id, merge, clean_name
from utils import clean_token_id, get_ctx, merge, clean_name
# howrare.is api
# https://api.howrare.is/v0.1/collections/smb/only_rarity
@ -34,66 +34,28 @@ from utils import clean_token_id, merge, clean_name
# old = pd.read_csv('./data/tokens.csv')
# metadata[(metadata.collection == 'Galactic Punks') & (metadata.feature_name=='attribute_count')].drop_duplicates(subset=['feature_value']).merge(old)
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId=f046bec0889c9d431ce124a626237e2236bc2527051d32ed31f6b5e6dc230669&offset=0&limit=500'
# r = requests.get(url)
# j = r.json()
# j.keys()
# len(j['data'])
# j['data'][0]
def how_rare_is_api():
url = 'https://api.howrare.is/v0.1/collections'
r = requests.get(url)
j = r.json()
j['result'].keys()
j['result']['data'][:10]
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
c_df.head(16)
seen = [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
len(j['result']['data'])
t_data = []
metadata = pd.DataFrame()
d = {
'Degen Apes': 'degenapes'
, 'Pesky Penguins': 'peskypenguinclub'
, 'Aurory': 'aurory'
, 'Solana Monkey Business': 'smb'
, 'Thugbirdz': 'thugbirdz'
}
# for collection, url in d.items():
# redo trippin ape tribe
for row in c_df.iterrows():
row = row[1]
collection = row['name']
url = row['url'][1:]
print('Working on collection {}, {}, {}'.format(collection, len(t_data), len(metadata)))
if url in seen or (len(metadata) and collection in metadata.collection.unique()):
print('Seen!')
continue
# collection = 'Cets on Creck'
# collection = 'SOLGods'
# collection = 'Meerkat Millionaires'
# collection = d['url'][1:]
# url = 'https://api.howrare.is/v0.1/collections'+d['url']
# url = 'https://api.howrare.is/v0.1/collections/meerkatmillionaires'
url = 'https://api.howrare.is/v0.1/collections/'+url
r = requests.get(url)
j = r.json()
for i in j['result']['data']['items']:
token_id = int(i['id'])
nft_rank = int(i['rank'])
mint = i['mint']
image = i['image']
t_data += [[ collection, token_id, nft_rank, mint, image ]]
# m_data += [[ collection, token_id, nft_rank ]]
m = pd.DataFrame(i['attributes'])
m['token_id'] = token_id
m['collection'] = collection
# metadata = metadata.append(m)
metadata = pd.concat([metadata, m])
old = pd.read_csv('./data/tokens.csv')
def add_to_df(t_data):
old = pd.read_csv('./data/tokens_2.csv')
sorted(old.collection.unique())
l0 = len(old)
do_merge = False
tokens = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
len(tokens)
tokens[tokens.nft_rank.isnull()]
tokens['collection'] = tokens.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
metadata['collection'] = metadata.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
rem = [ 'Jikan Studios','Fine Fillies' ]
print(tokens.groupby('collection').token_id.count())
tokens['clean_token_id'] = tokens.token_id
tokens['chain'] = 'Solana'
tokens = tokens[-tokens.collection.isin(rem)]
if do_merge:
old['token_id'] = old.token_id.astype(str)
tokens['token_id'] = tokens.token_id.astype(str)
@ -106,42 +68,230 @@ def how_rare_is_api():
old['clean_token_id'] = old.clean_token_id.fillna(old.token_id)
old['chain'] = old.chain.fillna('Solana')
else:
old = old.append(tokens)
# old = old.append(tokens)
old = pd.concat( [old, tokens] )
old['token_id'] = old.token_id.astype(str)
old = old.drop_duplicates(subset=['collection','token_id'], keep='last')
print('Adding {} rows'.format(len(old) - l0))
old[old.collection.isin(tokens.collection.unique())]
old[(old.collection.isin(tokens.collection.unique())) & (old.token_id == '6437')]
old[old.nft_rank.isnull()].groupby('collection').token_id.count()
old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
old.to_csv('./data/tokens_2.csv', index=False)
# tokens.to_csv('./data/tokens_2.csv', index=False)
def compile():
ctx = get_ctx()
query = 'SELECT DISTINCT address FROM silver_CROSSCHAIN.ADDRESS_LABELS'
seen = ctx.cursor().execute(query)
seen = pd.DataFrame.from_records(iter(seen), columns=[x[0] for x in seen.description])
seen = sorted(list(seen.ADDRESS.unique()))
tokens = pd.read_csv('./data/tokens.csv')
tokens = tokens[tokens.chain == 'Solana']
single_update_auth_labels = pd.read_csv('./data/single_update_auth_labels.csv')
mult_update_auth_labels = pd.read_csv('./data/mult_update_auth_labels.csv')
df = tokens.append(single_update_auth_labels).append(mult_update_auth_labels)
df = df[ (df.collection != 'Nan') & (df.collection != 'nan') & (df.collection.notnull()) ]
df = df[-(df.mint_address.isin(seen))]
df = df.drop_duplicates(subset=['mint_address'], keep='first')
# len(df)
# len(df.collection.unique())
# df.head()
# df.mint_address.tail(11000).head(5)
# df[df.mint_address == '2GgPNKGyzAQL4mriuH4kBpntYCNVSM2pQfzdsu3p8du5']
# df['seen'] = df.mint_address.isin(seen).astype(int)
# tmp = df[df.seen == 0].groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
# tmp.head(40)
# tmp.mint_address.sum()
df[df.mint_address.isnull()]
df[['mint_address','collection']].to_csv('~/Downloads/solana-nft-labels-06-29.csv', index=False)
def add_to_df(t_data, metadata, exclude_new = False):
old = pd.read_csv('./data/tokens.csv')
g0 = old.groupby('collection').token_id.count().reset_index()
sorted(old.collection.unique())
l0 = len(old)
do_merge = False
tokens = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
len(tokens)
tokens[tokens.nft_rank.isnull()]
tokens['collection'] = tokens.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
# rem = [ 'Jikan Studios','Fine Fillies' ]
# print(tokens.groupby('collection').token_id.count())
metadata['collection'] = metadata.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
tokens['clean_token_id'] = tokens.token_id
tokens['chain'] = 'Solana'
# tokens = tokens[-tokens.collection.isin(rem)]
# metadata = metadata[-metadata.collection.isin(rem)]
if do_merge:
old['token_id'] = old.token_id.astype(str)
tokens['token_id'] = tokens.token_id.astype(str)
old = old.merge(tokens, how='left', on=['collection','token_id'])
old[old.collection == 'Solana Monkey Business']
for c in [ 'nft_rank','mint_address','image_url' ]:
old[c] = old[c+'_x'].fillna(old[c+'_y'])
del old[c+'_x']
del old[c+'_y']
old['clean_token_id'] = old.clean_token_id.fillna(old.token_id)
old['chain'] = old.chain.fillna('Solana')
else:
# old = old.append(tokens)
old['collection'] = old.collection.apply(lambda x: clean_name(x))
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
if exclude_new:
rem = tokens.collection.unique()
old = old[-(old.collection.isin(rem))]
old = pd.concat( [old, tokens] )
old['token_id'] = old.token_id.astype(str)
old = old.drop_duplicates(subset=['collection','token_id'], keep='last')
g1 = old.groupby('collection').token_id.count().reset_index()
g = g0.merge(g1, how='outer', on=['collection']).fillna(0)
g['dff'] = g.token_id_y - g.token_id_x
print(g[g.dff != 0].sort_values('dff', ascending=0))
g[g.dff != 0].sort_values('dff', ascending=0).to_csv('~/Downloads/tmp.csv', index=False)
print('Adding {} rows'.format(len(old) - l0))
old = old[old.collection != 'Solanamonkeybusiness (Smb)']
# old[old.collection.isin(tokens.collection.unique())]
# old[(old.collection.isin(tokens.collection.unique())) & (old.token_id == '6437')]
old[old.nft_rank.isnull()].groupby('collection').token_id.count()
# old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
old.to_csv('./data/tokens.csv', index=False)
# tokens.to_csv('./data/tokens_2.csv', index=False)
old = pd.read_csv('./data/metadata.csv')
a = old[['collection','token_id']].drop_duplicates()
a['exclude'] = 0
a['token_id'] = a.token_id.astype(str)
metadata['token_id'] = metadata.token_id.astype(str)
m = metadata.merge(a, how='left')
m = m[m.exclude.isnull()]
len(m[m.exclude.isnull()].token_id.unique())
del m['exclude']
# old = old[-(old.collection == 'Meerkat Millionaires Cc')]
print(sorted(old.collection.unique()))
g0 = old.groupby('collection').token_id.count().reset_index()
l0 = len(old)
metadata.collection.unique()
old['collection'] = old.collection.apply(lambda x: clean_name(x))
metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
if exclude_new:
rem = metadata.collection.unique()
old = old[-(old.collection.isin(rem))]
# old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
# a = old[['collection','token_id']].drop_duplicates()
# a['exclude'] = 0
# a['token_id'] = a.token_id.astype(str)
# metadata['token_id'] = metadata.token_id.astype(str)
# m = metadata.merge(a, how='left')
# m = m[m.exclude.isnull()]
# len(m[m.exclude.isnull()].token_id.unique())
# del m['exclude']
# old = old[-(old.collection == 'Meerkat Millionaires Cc')]
# print(sorted(old.collection.unique()))
# metadata.collection.unique()
# metadata = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
# old = old.merge(tokens, how='left', on=['collection','token_id'])
old = old.append(m[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'}) )
# old = old.append(m[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'}) )
old = pd.concat( [old, metadata[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'})] )
old['token_id'] = old.token_id.astype(str)
old = old.drop_duplicates(subset=['collection','token_id','feature_name'])
old = old.drop_duplicates(subset=['collection','token_id','feature_name'], keep='last')
# old['nft_rank'] = old.nft_rank_y.fillna(old.nft_rank_y)
# del old['nft_rank_x']
g1 = old.groupby('collection').token_id.count().reset_index()
g = g0.merge(g1, how='outer', on=['collection']).fillna(0)
g['dff'] = g.token_id_y - g.token_id_x
print(g[g.dff != 0].sort_values('dff', ascending=0))
# del old['nft_rank_y']
print('Adding {} rows'.format(len(old) - l0))
print(old.groupby('collection').token_id.count())
old[old.collection.isin(metadata.collection.unique())]
old[(old.collection == 'Catalina Whale Mixer') & (old.token_id == '1206')]
# print(old.groupby('collection').token_id.count())
# old[old.collection.isin(metadata.collection.unique())]
# old[(old.collection == 'Catalina Whale Mixer') & (old.token_id == '1206')]
old.to_csv('./data/metadata.csv', index=False)
# metadata.to_csv('./data/metadata_2.csv', index=False)
def how_rare_is_api():
ctx = get_ctx()
query = '''
SELECT DISTINCT LOWER(project_name) AS lower_collection
FROM solana.core.dim_nft_metadata
'''
df = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
url = 'https://api.howrare.is/v0.1/collections'
r = requests.get(url)
j = r.json()
j['result'].keys()
j['result']['data'][:10]
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
c_df['lower_collection'] = c_df.url.apply(lambda x: x.lower().strip() )
seen = sorted(df.LOWER_COLLECTION.apply(lambda x: re.sub(' |_|\'', '', x) ).values)
# seen[:300]
# x = 590
# seen[x:x+50]
c_df['seen_1'] = c_df.url.apply(lambda x: re.sub(' |_|\'', '', x[1:]).lower() in seen ).astype(int)
c_df['seen_2'] = c_df.name.apply(lambda x: re.sub(' |_|\'', '', x).lower() in seen ).astype(int)
c_df['seen'] = (c_df.seen_1 + c_df.seen_2 > 0).astype(int)
c_df.head()
c_df.seen.sum()
c_df[c_df.seen == 0].head(10)
# c_df.head(16)
seen = [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
c_df = c_df[-(c_df.url.isin([ '/'+x for x in seen]))]
# rem = [ 'kaiju','jikanstudios' ]
# c_df = c_df[-(c_df.url.isin([ '/'+x for x in rem]))]
# seen = list(pd.read_csv('./data/tokens.csv').collection.unique())
# c_df = c_df[-(c_df.name.isin(seen))]
# len(j['result']['data'])
# c_df = c_df[c_df.url.isin(['/blocksmithlabs'])]
# c_df = c_df[c_df.url.isin(['/generousrobotsdao','/thestonedfrogs'])]
c_df = c_df[c_df.seen == 0]
sorted(c_df.url.unique())
it = 0
tot = len(c_df)
# c_df.head()
# c_df = c_df[c_df.url != '/midnightpanthers']
t_data = []
m_data = []
# metadata = pd.DataFrame()
for row in c_df.iterrows():
it += 1
row = row[1]
collection = row['name']
print('#{} / {}: {}'.format(it, tot, collection))
url = row['url'][1:]
if it > 1:
assert(len(t_data))
assert(len(m_data))
print('Working on collection {}, {}, {}'.format(collection, len(t_data), len(m_data)))
# if url in seen or (len(metadata) and collection in metadata.collection.unique()):
# print('Seen!')
# continue
# collection = 'Cets on Creck'
# collection = 'SOLGods'
# collection = 'Meerkat Millionaires'
# collection = d['url'][1:]
# url = 'https://api.howrare.is/v0.1/collections'+d['url']
# url = 'https://api.howrare.is/v0.1/collections/meerkatmillionaires'
# url = 'https://api.howrare.is/v0.1/collections/'+url+'/only_rarity'
url = 'https://api.howrare.is/v0.1/collections/'+url
r = requests.get(url)
j = r.json()
for i in j['result']['data']['items']:
try:
token_id = int(i['id'])
if True:
nft_rank = int(i['rank'])
mint = i['mint']
image = i['image']
t_data += [[ collection, token_id, nft_rank, mint, image ]]
if False:
for d in i['attributes']:
d['token_id'] = token_id
d['collection'] = collection
m_data += [ d ]
# metadata = metadata.append(m)
# metadata = pd.concat([metadata, m])
except:
print('Error')
# add_to_df(t_data)
metadata = pd.DataFrame(m_data)
metadata
add_to_df(t_data, metadata, True)
metadata.head()
metadata.value.unique()
def convert_collection_names():
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:

View File

@ -1,6 +1,6 @@
import os
import json
# import psycopg2
import psycopg2
import pandas as pd
import requests
@ -26,6 +26,49 @@ def thorchain():
def f():
conn = psycopg2.connect("dbname=suppliers user=postgres password=postgres")
conn = psycopg2.connect("dbname=suppliers user=postgres password=postgres")
conn = psycopg2.connect(
host="vic5o0tw1w-repl.twtim97jsb.tsdb.cloud.timescale.com",
user="tsdbadmin",
password="yP4wU5bL0tI0kP3k"
)
query = '''
SELECT from_addr
, to_addr
, asset
, amount_e8
, block_timestamp
, COUNT(1) AS n
FROM midgard.transfer_events
WHERE block_timestamp < 1650000000000000000
AND block_timestamp >= 1640000000000000000
GROUP BY 1, 2, 3, 4, 5
HAVING COUNT(1) > 1
'''
df = pd.read_sql_query(query, conn)
cur.execute(query)
it = 0
qs = []
for i in range(1618000000000000000, 1657000000000000000, 3000000000000000):
print(i)
it += 1
query = '''
SELECT from_addr
, to_addr
, asset
, amount_e8
, block_timestamp
, COUNT(1) AS n
FROM midgard.transfer_events
WHERE block_timestamp >= {}
AND block_timestamp < {}
GROUP BY 1, 2, 3, 4, 5
HAVING COUNT(1) > 1
'''.format(i, i + 3000000000000000)
with open('/Users/kellenblumberg/Downloads/query_{}.txt'.format(it), 'w') as f:
f.write(query)
def read_tokenlist():

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,7 @@
import os
import re
import pandas as pd
import snowflake.connector
clean_names = {
@ -20,9 +22,26 @@ clean_names = {
,'mayc': 'MAYC'
,'solgods': 'SOLGods'
,'meerkatmillionairescc': 'Meerkat Millionaires'
,'ggsg:galacticgeckos': 'Galactic Geckos'
,'solstein': 'SolStein'
# ,'stonedapecrew': 'Stoned Ape Crew'
}
def get_ctx():
usr = os.getenv('SNOWFLAKE_USR')
pwd = os.getenv('SNOWFLAKE_PWD')
# with open('snowflake.pwd', 'r') as f:
# pwd = f.readlines()[0].strip()
# with open('snowflake.usr', 'r') as f:
# usr = f.readlines()[0].strip()
ctx = snowflake.connector.connect(
user=usr,
password=pwd,
account='vna27887.us-east-1'
)
return(ctx)
def format_num(x):
return('{:,}'.format(round(x, 2)))
@ -48,6 +67,7 @@ def clean_name(name):
name = re.sub('-', ' ', name)
name = re.sub(' On ', ' on ', name)
name = re.sub('Defi ', 'DeFi ', name)
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
return(name)

BIN
viz/.DS_Store vendored

Binary file not shown.

View File

@ -33,6 +33,7 @@ clean_names = {
,'solgods': 'SOLGods'
,'meerkatmillionairescc': 'Meerkat Millionaires'
,'stonedapecrew': 'Stoned Ape Crew'
,'stonedapecrew': 'Stoned Ape Crew'
}
def clean_name(name):
@ -44,6 +45,7 @@ def clean_name(name):
name = re.sub('-', ' ', name)
name = re.sub(' On ', ' on ', name)
name = re.sub('Defi ', 'DeFi ', name)
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
return(name)
#########################
@ -80,6 +82,7 @@ def clean_token_id(df, data_folder):
del df['clean_token_id']
return(df)
# '~/git/nft-deal-score/viz/'
def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudio-data/'):
fname = data_folder+'nft_deal_score_sales.csv'
ctx = get_ctx(usr, pwd)
@ -104,7 +107,7 @@ def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudi
old[old.token_id.isnull()].groupby('collection').sale_date.count()
go = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_old'})
l0 = len(old)
app = old[old.collection.isin(m.collection.unique())].append(m)
app = pd.concat([old[old.collection.isin(m.collection.unique())], m])
app = app[ app.price > 0 ]
app['tmp'] = app.apply(lambda x: x['collection']+str(int(float(x['token_id'])))+x['sale_date'][:10], 1 )
if len(app[app.tx_id.isnull()]):
@ -115,7 +118,7 @@ def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudi
else:
app = app.drop_duplicates(subset=['tx_id'])
old = old[-old.collection.isin(app.collection.unique())]
old = old.append(app)
old = pd.concat([old, app])
old = old[[ 'collection','token_id','sale_date','price','tx_id' ]]
@ -142,22 +145,43 @@ def add_solana_sales(usr, pwd, data_folder = '/rstudio-data/'):
, sales_amount AS price
FROM solana.fact_nft_sales s
JOIN solana.dim_nft_metadata m ON LOWER(m.mint) = LOWER(s.mint)
WHERE block_timestamp >= CURRENT_DATE - 14
WHERE block_timestamp >= CURRENT_DATE - 7
AND m.project_name IN (
'Astrals',
'Aurory',
'Cets On Creck',
'Blocksmith Labs',
'Bohemia',
'Bot Head',
'Bubblegoose Ballers',
'Cat Cartel',
'Catalina Whale Mixer',
'Cets On Creck',
'Citizens by Solsteads',
'Communi3: Mad Scientists',
'DeFi Pirates',
'DeFi Pirates',
'DeGods',
'Degen Apes',
'Degen Dojo',
'Doge Capital',
'Famous Fox Federation',
'GGSG: Galactic Geckos',
'Just Ape.',
'Looties',
'Meerkat Millionaires',
'Monkey Baby Business',
'Okay Bears',
'Pesky Penguins',
'Primates',
'Quantum Traders',
'SOLGods',
'SolStein',
'Solana Monke Rejects',
'Solana Monkey Business',
'Solanauts',
'Stoned Ape Crew',
'Thugbirdz'
'Thugbirdz',
'Trippin Ape Tribe'
)
'''
add_sales(query, usr, pwd, False, data_folder)
@ -172,10 +196,10 @@ def add_ethereum_sales(usr, pwd, data_folder = '/rstudio-data/'):
, price
, tx_id
FROM ethereum.nft_events
WHERE project_name IN (
'BoredApeYachtClub'
, 'MutantApeYachtClub'
, 'BoredApeKennelClub'
WHERE LOWER(project_name) IN (
'boredapeyachtclub'
, 'mutantapeyachtclub'
, 'boredapekennelclub'
)
AND price IS NOT NULL
AND block_timestamp >= CURRENT_DATE - 14

View File

@ -5,10 +5,10 @@ account: kellen
server: science.flipsidecrypto.xyz
hostUrl: https://science.flipsidecrypto.xyz/__api__
appId: 114
bundleId: 405
bundleId: 434
url: https://science.flipsidecrypto.xyz/content/67141ada-46fd-4750-a690-0be248c461f3/
when: 1654490491.17543
lastSyncTime: 1654490491.17545
when: 1656289651.71961
lastSyncTime: 1656289651.71962
asMultiple: FALSE
asStatic: FALSE
ignoredFiles: scrape_terra_nfts.py|add_sales.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_listings.csv

View File

@ -5,10 +5,10 @@ account: kellen
server: science.flipsidecrypto.xyz
hostUrl: https://science.flipsidecrypto.xyz/__api__
appId: 93
bundleId: 410
bundleId: 435
url: https://science.flipsidecrypto.xyz/nft-deal-score/
when: 1654526436.70554
lastSyncTime: 1654526436.70555
when: 1656375887.36944
lastSyncTime: 1656375887.36945
asMultiple: FALSE
asStatic: FALSE
ignoredFiles: add_sales.py|data (2).Rdata|data copy 2.Rdata|data copy.Rdata|data.Rdata|exploration.R|loan_score_model.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_listings.csv|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_tokens.csv|scrape_eth_nfts.py|scrape_terra_nfts.py|update_data.R|update_nft_deal_score_data.RMD|upload_data.R|utils.py

View File

@ -0,0 +1,14 @@
name: update_nft_deal_score_data4
title:
username: kellen
account: kellen
server: science.flipsidecrypto.xyz
hostUrl: https://science.flipsidecrypto.xyz/__api__
appId: 114
bundleId: 430
url: https://science.flipsidecrypto.xyz/content/67141ada-46fd-4750-a690-0be248c461f3/
when: 1656200355.03703
lastSyncTime: 1656200355.03704
asMultiple: FALSE
asStatic: FALSE
ignoredFiles: add_sales.py|data (2).Rdata|data copy 2.Rdata|data copy.Rdata|data.Rdata|exploration.R|loan_score_model.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_listings.csv|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_tokens.csv|scrape_eth_nfts.py|scrape_terra_nfts.py|update_data.R|update_nft_deal_score_data.RMD|upload_data.R|utils.py

View File

@ -47,6 +47,7 @@ def clean_name(name):
name = re.sub('-', ' ', name)
name = re.sub(' On ', ' on ', name)
name = re.sub('Defi ', 'DeFi ', name)
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
return(name)
def scrape_randomearth(data_folder = '/rstudio-data/'):

View File

@ -3,8 +3,10 @@ server <- function(input, output, session) {
user <- Sys.info()[['user']]
# options(warn=-1)
isRstudio <- user != 'kellenblumberg'
# isRstudio <- TRUE
base_dir <- ifelse(
user == 'rstudio-connect'
isRstudio
, '/rstudio-data/'
, ifelse(user == 'fcaster'
, '/srv/shiny-server/nft-deal-score/'
@ -472,7 +474,7 @@ server <- function(input, output, session) {
selectInput(
inputId = 'collectionname'
, label = NULL
, selected = 'Catalina Whale Mixer'
, selected = 'Famous Fox Federation'
, choices = choices
, width = "100%"
)
@ -561,7 +563,7 @@ updateSelectizeInput(session, 'tokenid', choices = choices, server = TRUE)
selected %in% c('Cets on Creck')
, strsplit(selected, ' |s ')[[1]][1]
, ifelse(
selected %in% c('Stoned Ape Crew', 'Catalina Whale Mixer')
selected %in% c('Stoned Ape Crew', 'Catalina Whale Mixer','Famous Fox Federation')
, paste(strsplit(selected, ' ')[[1]][1], strsplit(selected, ' ')[[1]][2], sep = ' ')
, substr(selected, 1, nchar(selected) - 1)
)

View File

@ -30,9 +30,9 @@ fluidPage(
class="hero"
, fluidRow(
class = "header-images",
column(4, uiOutput("solanaimg")),
column(4, uiOutput("terraimg")),
column(4, uiOutput("ethereumimg"))
column(6, uiOutput("solanaimg")),
# column(4, uiOutput("terraimg")),
column(6, uiOutput("ethereumimg"))
)
, h1(
class="header",

View File

@ -3,143 +3,76 @@ library(reticulate)
library(httr)
library(jsonlite)
user <- Sys.info()[['user']]
isRstudio <- user == 'rstudio-connect'
# nft_deal_score_listings_data.RData
.topic = 'prod-nft-metadata-uploads'
.key = 'solana-nft-metadata'
.url = 'https://kafka-rest-proxy.flipside.systems'
user <- Sys.info()[['user']]
isRstudio <- user %in% c('rstudio-connect','data-science')
base_dir <- ifelse(
user == 'rstudio-connect'
isRstudio
, '/rstudio-data/'
, ifelse(user == 'fcaster'
, '/srv/shiny-server/nft-deal-score/'
, '~/git/nft-deal-score/viz/'
)
, '~/git/nft-deal-score/viz/'
)
# base_dir <- '/srv/shiny-server/nft-deal-score/'
listings_file <- paste0(base_dir,'nft_deal_score_listings_data.RData')
load(listings_file)
if(isRstudio) {
source('/home/data-science/data_science/util/util_functions.R')
source_python('/home/data-science/data_science/nft-deal-score/scrape_terra_nfts.py')
source_python('/home/data-science/data_science/viz/nft-deal-score/upload_solana_nft_labels.py')
} else {
source('~/data_science/util/util_functions.R')
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
source_python(paste0(base_dir, 'upload_solana_nft_labels.py'))
}
# py_install('pandas', pip = TRUE)
# py_install('cloudscraper', pip = TRUE)
# py_install('snowflake-connector-python', pip = TRUE)
# cloudscraper <- import('cloudscraper')
base_dir <- ifelse(
user == 'rstudio-connect'
, '/rstudio-data/'
, ifelse(user == 'fcaster'
, '/srv/shiny-server/nft-deal-score/'
, '~/git/nft-deal-score/viz/'
)
)
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
source_python(paste0(base_dir, 'add_sales.py'))
query <- '
SELECT DISTINCT project_name AS collection
, mint AS tokenMint
, token_id
FROM solana.dim_nft_metadata
'
mints <- QuerySnowflake(query)
colnames(mints) <- c('collection','tokenMint','token_id')
# pull terra listings
terra_listings <- scrape_randomearth(base_dir)
head(terra_listings)
unique(terra_listings$collection)
#########################
# Load NFT Data #
#########################
mints_from_me()
pull_from_metaboss()
how_rare_is_api()
# saves labels to '/rstudio-data/nft_labels/solana_nft_labels.csv'
compile()
get_me_url <- function(collection, offset) {
return(paste0('https://api-mainnet.magiceden.dev/v2/collections/',collection,'/listings?offset=',offset,'&limit=20'))
}
get_smb_url <- function(page) {
return(paste0('https://market.solanamonkey.business/api/items?limit=40&page=',page))
}
solana_listings <- data.table()
solana_collections <- c(
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
)
for(collection in solana_collections) {
print(paste0('Working on ', collection, '...'))
has_more <- TRUE
offset <- 0
while(has_more) {
Sys.sleep(1)
print(paste0('Offset #', offset))
url <- get_me_url(collection, offset)
response <- GET(url)
content <- rawToChar(response$content)
content <- fromJSON(content)
if( typeof(content) == 'list' ) {
content <- rbindlist(content, fill=T)
}
has_more <- nrow(content) >= 20
if(nrow(content) > 0 && length(content) > 0) {
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
df <- df[, list(collection, token_id, price)]
offset <- offset + 20
solana_listings <- rbind(solana_listings, df)
} else {
has_more <- FALSE
}
###############################
# Upload NFT Metadata #
###############################
files <- list.files(paste0(base_dir, 'nft_labels/metadata/results/'))
it <- 0
for(f in files) {
print(f)
results <- read.csv(paste0(base_dir,'/nft_labels/metadata/results/',f))
for(r in results$results) {
it <- it + 1
print(paste0('#',it))
out <- tryCatch(
{
# s <- readChar(fileName, file.info(fileName)$size)
s <- r
.body <- paste0(
'{"records": [{"key": "',.key,'","value":',s,'}]}',
collapse = ""
)
r <- httr::POST(url = paste(.url,"topics",.topic,sep = "/"),
add_headers('Content-Type' = "application/vnd.kafka.json.v2+json",
'Accept' = "application/vnd.kafka.v2+json, application/vnd.kafka+json, application/json"),
body = .body)
print(r)
},
error=function(cond) {
print(cond)
return(NA)
},
warning=function(cond) {
print(cond)
return(NULL)
},
finally={
}
)
}
}
for(collection in c('Solana Monkey Business')) {
print(paste0('Working on ', collection, '...'))
has_more <- TRUE
page <- 1
while(has_more) {
Sys.sleep(1)
print(paste0('Page #', page))
url <- get_smb_url(page)
response <- GET(url)
content <- rawToChar(response$content)
content <- fromJSON(content)
# content <- rbindlist(content, fill=T)
content <- content %>% as.data.table()
has_more <- nrow(content) > 0 && 'price' %in% colnames(content)
if(has_more) {
content <- content[, list(mint, price)]
content <- unique(content)
content$price <- as.numeric(content$price) / (10^9)
has_more <- nrow(content) >= 40
colnames(content)[1] <- 'tokenMint'
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
df <- df[, list(collection, token_id, price)]
page <- page + 1
solana_listings <- rbind(solana_listings, df)
}
}
}
head(solana_listings)
head(terra_listings)
new_listings <- rbind(solana_listings, terra_listings)
new_listings <- unique(new_listings)
# listings <- read.csv('./data/listings.csv') %>% as.data.table()
rem <- unique(new_listings$collection)
rem
listings <- listings[ !(collection %in% eval(rem)), ]
listings <- listings[, list(collection, token_id, price)]
listings <- rbind(listings, new_listings)
listings <- listings[order(collection, price)]
listings[, token_id := as.integer(token_id)]
save(
listings
, file = listings_file
)

View File

@ -51,16 +51,19 @@ library(reticulate)
# py_install('cloudscraper', pip = TRUE)
# r reticulate python ModuleNotFoundError
# print('54')
use_python('/opt/python/3.10.4/bin/python')
py_install('pandas', pip = TRUE)
py_install('snowflake-connector-python', pip = TRUE)
SD_MULT = 3
SD_SCALE = 1.95
user <- Sys.info()[['user']]
isRstudio <- user == 'rstudio-connect'
# isRstudio <- user == 'rstudio-connect'
isRstudio <- user != 'kellenblumberg'
# isRstudio <- TRUE
if (isRstudio) {
use_python('/opt/python/3.10.4/bin/python')
py_install('pandas', pip = TRUE)
py_install('snowflake-connector-python', pip = TRUE)
}
base_dir <- ifelse(
isRstudio
@ -109,6 +112,7 @@ add_ethereum_sales(usr, pwd, base_dir)
# read sales data from nft_deal_score_sales.csv
raw_sales <- read.csv(paste0(base_dir,'nft_deal_score_sales.csv')) %>% as.data.table()
raw_sales <- raw_sales[order(collection, sale_date, price)]
unique(raw_sales$collection)
# calculate the floor price
raw_sales <- raw_sales %>%
@ -149,7 +153,7 @@ query <- '
'
mints <- QuerySnowflake(query)
colnames(mints) <- c('collection','tokenMint','token_id')
mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
# mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
# pull terra listings
# terra_listings <- scrape_randomearth(base_dir)
@ -168,10 +172,75 @@ get_smb_url <- function(page) {
solana_listings <- data.table()
solana_collections <- c(
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
'famous_fox_federation'
)
solana_collections <- c(
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
'blocksmith_labs'
, 'dazedducks_metagalactic_club'
, 'degenerate_trash_pandas'
, 'famous_fox_federation'
, 'generous_robots_dao'
, 'ghostface'
, 'ghostface'
, 'ghostface_gen_2'
, 'portals'
, 'smokeheads'
, 'theorcs'
)
solana_collections <- c(
# 'blocksmith_labs'
# , 'dazedducks_metagalactic_club'
# , 'degenerate_trash_pandas'
'famous_fox_federation',
# , 'generous_robots_dao'
# , 'ghostface'
# , 'ghostface_gen_2'
# , 'portals'
# , 'smokeheads'
# , 'theorcs',
# 'astrals',
'aurory',
# 'bohemia_',
# 'bothead',
'bubblegoose_ballers',
# 'cat_cartel',
'cets_on_creck',
# 'citizens_by_solsteads',
# 'communi3',
# 'defi_pirates',
# 'degendojonft',
'degenerate_ape_academy',
# 'degenerate_ape_kindergarten',
'degods',
# 'doge_capital',
# 'galactic_gecko_space_garage',
# 'justape',
# 'looties',
# 'marinadechefs',
'meerkat_millionaires_country_club',
# 'monkey_baby_business',
'okay_bears',
'pesky_penguins',
'portals',
'primates',
# 'psykerhideouts',
# 'quantum_traders',
# 'solana_monke_rejects',
'solana_monkey_business',
# 'solanauts',
'solgods',
# 'solstein',
'stoned_ape_crew',
# 'taiyo_infants_incubators',
'the_catalina_whale_mixer',
# 'the_remnants_',
# 'the_tower',
# 'the_vaultx_dao',
'thugbirdz'
# 'trippin_ape_tribe',
# 'visionary_studios'
)
# headers = c(
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
@ -244,8 +313,6 @@ for(collection in solana_collections) {
}
}
solana_listings[order(token_id)]
for(collection in c('Solana Monkey Business')) {
print(paste0('Working on ', collection, '...'))
has_more <- TRUE
@ -292,6 +359,9 @@ listings <- listings[ !(collection %in% c('LunaBulls','Galactic Punks','Galactic
listings <- listings[!is.na(price)]
listings <- listings %>% as.data.table()
sort(unique(listings$collection))
# write.csv(unique(listings[, collection]), '~/Downloads/tmp.csv', row.names=F)
floors <- listings %>%
group_by(collection) %>%
summarize(cur_floor = min(price)) %>%
@ -312,7 +382,7 @@ get_fmp <- function(data, coefsdf, pred_price) {
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
}
if(TRUE) {
if(FALSE) {
coefsdf[, tot := lin_coef + log_coef ]
coefsdf[, lin_coef := lin_coef / tot]
coefsdf[, log_coef := log_coef / tot]
@ -434,7 +504,7 @@ if(TRUE) {
tmp <- tmp[order(-pts)]
content <- tmp[ (price < 0.9 * fair_market_price) , head(.SD, 2), by = collection]
content <- content[order(-pts)]
content <- head(content[order(-pts)], 15)
# content <- paste(c(header, content$label, collapse='\n'))
content <- paste(c(header, content$label), collapse='\n')
@ -459,15 +529,17 @@ if(TRUE) {
colnames(fmp)[3] <- 'rarity_rank'
colnames(fmp)[4] <- 'deal_score_rank'
for( cur_collection in unique(fmp$collection)) {
print(paste0('Working on ',cur_collection, '...'))
data <- fmp[collection == eval(cur_collection)]
KafkaGeneric(
.topic = 'prod-data-science-uploads'
, .url = 'https://kafka-rest-proxy.flipside.systems'
, .project = paste0('nft-deal-score-rankings-', cur_collection)
, .data = data
)
if (FALSE) {
for( cur_collection in unique(fmp$collection)) {
print(paste0('Working on ',cur_collection, '...'))
data <- fmp[collection == eval(cur_collection)]
KafkaGeneric(
.topic = 'prod-data-science-uploads'
, .url = 'https://kafka-rest-proxy.flipside.systems'
, .project = paste0('nft-deal-score-rankings-', cur_collection)
, .data = data
)
}
}
}
@ -480,6 +552,7 @@ KafkaGeneric(
, .data = data
)
sort(unique(listings$collection))
save(
listings

566
viz/update_nft_labels.R Normal file
View File

@ -0,0 +1,566 @@
---
title: "Update NFT Deal Score Data"
author: "Kellen"
date: "2022-04-20"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Run Updates
Hello I am running this at `r Sys.time()`
```{r update}
#include all required libraries here
#EVEN IF YOU SOURCE util_functions.R
#YOU HAVE TO PUT THE LIBRARIES HERE I KNOW SORRY
#BUT HERE THEY ALL ARE TO SAVE YOU TIME
# install.packages('RCurl')
library(RCurl)
library(fasttime)
library(gridExtra)
library(ggplot2)
library(data.table)
library(reshape2)
library(dplyr)
library(dbplyr)
library(RJSONIO)
library(magrittr)
library(RJSONIO)
library(xts)
library(quantmod)
library(fTrading)
library(curl)
library(stringr)
library(aws.s3)
library(RPostgres)
library(odbc)
library(httr)
library(jsonlite)
library(reticulate)
#NOW COPY EVERYTHING ELSE FROM YOUR CURRENT
#update_data.R FILE HERE ---------->
# virtualenv_create('pyvenv')
# use_virtualenv('pyvenv')
# virtualenv_install('pyvenv', 'pandas')
# virtualenv_install('pyvenv', 'pandas')
# py_install('cloudscraper', pip = TRUE)
# r reticulate python ModuleNotFoundError
# print('54')
SD_MULT = 3
SD_SCALE = 1.95
user <- Sys.info()[['user']]
# isRstudio <- user == 'rstudio-connect'
isRstudio <- user != 'kellenblumberg'
# isRstudio <- TRUE
if (isRstudio) {
use_python('/opt/python/3.10.4/bin/python')
py_install('pandas', pip = TRUE)
py_install('snowflake-connector-python', pip = TRUE)
}
base_dir <- ifelse(
isRstudio
, '/rstudio-data/'
, ifelse(user == 'fcaster'
, '/srv/shiny-server/nft-deal-score/'
, '~/git/nft-deal-score/viz/'
)
)
if(isRstudio) {
source('/home/data-science/data_science/util/util_functions.R')
source('/home/data-science/data_science/util/kafka_utils.R')
source_python('/home/data-science/data_science/viz/nft-deal-score/add_sales.py')
source_python('~/upload_solana_nft_labels.py')
} else {
source('~/data_science/util/util_functions.R')
source('~/data_science/util/kafka_utils.R')
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
source_python(paste0(base_dir, 'add_sales.py'))
}
usr <- readLines(file.path(base.path,"data_science/util/snowflake.usr"))
pwd <- readLines(file.path(base.path,"data_science/util/snowflake.pwd"))
load(paste0(base_dir,'nft_deal_score_data.RData'))
listings_file <- paste0(base_dir,'nft_deal_score_listings_data.RData')
sales_file <- paste0(base_dir,'nft_deal_score_sales_data.RData')
load(listings_file)
coefsdf[, tot := lin_coef + log_coef ]
coefsdf[, lin_coef := lin_coef / tot]
coefsdf[, log_coef := log_coef / tot]
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
# write sales data to nft_deal_score_sales.csv
add_solana_sales(usr, pwd, base_dir)
add_ethereum_sales(usr, pwd, base_dir)
# add_terra_sales(usr, pwd, base_dir)
# read sales data from nft_deal_score_sales.csv
raw_sales <- read.csv(paste0(base_dir,'nft_deal_score_sales.csv')) %>% as.data.table()
raw_sales <- raw_sales[order(collection, sale_date, price)]
unique(raw_sales$collection)
# calculate the floor price
raw_sales <- raw_sales %>%
group_by(collection) %>%
mutate(mn_20=lag(price, 1)) %>%
as.data.table()
raw_sales <- raw_sales %>%
group_by(collection) %>%
mutate(rolling_floor=rollapply(mn_20, width = 20, FUN = "quantile", p = .0575, na.pad = TRUE, align = 'right')) %>%
as.data.table()
raw_sales[, rolling_floor := nafill(rolling_floor, type = "nocb")]
# calculate the fair market price
tmp <- merge( raw_sales[, list(collection, token_id, sale_date, price, tx_id, rolling_floor)], coefsdf, by=c('collection') )
tmp <- merge( tmp, pred_price, by=c('collection','token_id') )
tmp[, abs_chg := (rolling_floor - floor_price) * lin_coef ]
tmp[, pct_chg := (rolling_floor - floor_price) * log_coef ]
tmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
# save to an .RData file
sales <- tmp[, list(collection, token_id, sale_date, price, nft_rank, fair_market_price, rolling_floor)]
colnames(sales) <- c('collection', 'token_id', 'block_timestamp', 'price', 'nft_rank', 'pred', 'mn_20')
save(
sales
, file = sales_file
)
# load the mints
query <- '
SELECT DISTINCT project_name AS collection
, mint AS tokenMint
, token_id
FROM solana.dim_nft_metadata
'
mints <- QuerySnowflake(query)
colnames(mints) <- c('collection','tokenMint','token_id')
# mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
# pull terra listings
# terra_listings <- scrape_randomearth(base_dir)
# head(terra_listings)
# unique(terra_listings$collection)
# 9c39e05c-db3c-4f3f-ac48-84099111b813
get_me_url <- function(collection, offset) {
return(paste0('https://api-mainnet.magiceden.dev/v2/collections/',collection,'/listings?offset=',offset,'&limit=20'))
}
get_smb_url <- function(page) {
return(paste0('https://market.solanamonkey.business/api/items?limit=40&page=',page))
}
solana_listings <- data.table()
solana_collections <- c(
'famous_fox_federation'
)
solana_collections <- c(
'blocksmith_labs'
, 'dazedducks_metagalactic_club'
, 'degenerate_trash_pandas'
, 'famous_fox_federation'
, 'generous_robots_dao'
, 'ghostface'
, 'ghostface'
, 'ghostface_gen_2'
, 'portals'
, 'smokeheads'
, 'theorcs'
)
solana_collections <- c(
# 'blocksmith_labs'
# , 'dazedducks_metagalactic_club'
# , 'degenerate_trash_pandas'
'famous_fox_federation',
# , 'generous_robots_dao'
# , 'ghostface'
# , 'ghostface_gen_2'
# , 'portals'
# , 'smokeheads'
# , 'theorcs',
# 'astrals',
'aurory',
# 'bohemia_',
# 'bothead',
'bubblegoose_ballers',
# 'cat_cartel',
'cets_on_creck',
# 'citizens_by_solsteads',
# 'communi3',
# 'defi_pirates',
# 'degendojonft',
'degenerate_ape_academy',
# 'degenerate_ape_kindergarten',
'degods',
# 'doge_capital',
# 'galactic_gecko_space_garage',
# 'justape',
# 'looties',
# 'marinadechefs',
'meerkat_millionaires_country_club',
# 'monkey_baby_business',
'okay_bears',
'pesky_penguins',
'portals',
'primates',
# 'psykerhideouts',
# 'quantum_traders',
# 'solana_monke_rejects',
'solana_monkey_business',
# 'solanauts',
'solgods',
# 'solstein',
'stoned_ape_crew',
# 'taiyo_infants_incubators',
'the_catalina_whale_mixer',
# 'the_remnants_',
# 'the_tower',
# 'the_vaultx_dao',
'thugbirdz'
# 'trippin_ape_tribe',
# 'visionary_studios'
)
# headers = c(
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
# )
for(collection in solana_collections) {
print(paste0('Working on ', collection, '...'))
has_more <- TRUE
has_err <- FALSE
offset <- 0
while(has_more) {
Sys.sleep(1)
out <- tryCatch(
{
print(paste0('Offset #', offset))
url <- get_me_url(collection, offset)
response <- GET(
url = url
# , add_headers(.headers = c('Authorization'= 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'))
, add_headers('Authorization'= 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813')
)
# r <- content(response, as = 'parsed')
content <- rawToChar(response$content)
content <- fromJSON(content)
if( !is.data.frame(content) ) {
content <- rbindlist(content, fill=T)
}
has_more <- nrow(content) > 0
if(nrow(content) > 0 && length(content) > 0) {
# content <- data.table(content)
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
# if(nrow(df) > 0) {
# print(min(df$price))
# }
df <- df[, list(collection, token_id, price)]
solana_listings <- rbind(solana_listings, df)
} else {
has_more <- FALSE
}
offset <- offset + 20
has_err <- FALSE
},
error=function(cond) {
print(paste0('Error: ', cond))
return(TRUE)
# has_more <- FALSE
# if(has_err) {
# has_err <- FALSE
# has_more <- FALSE
# return(TRUE)
# } else {
# Sys.sleep(15)
# has_err <- TRUE
# return(FALSE)
# }
# return(TRUE)
},
warning=function(cond) {
print(paste0('Warning: ', cond))
return(TRUE)
},
finally={
# return(TRUE)
# print(paste0('Finally'))
}
)
if(out) {
offset <- offset + 20
# has_more <- FALSE
}
}
}
for(collection in c('Solana Monkey Business')) {
print(paste0('Working on ', collection, '...'))
has_more <- TRUE
page <- 1
while(has_more) {
Sys.sleep(1)
print(paste0('Page #', page))
url <- get_smb_url(page)
response <- GET(url)
content <- rawToChar(response$content)
content <- fromJSON(content)
# content <- rbindlist(content, fill=T)
content <- content %>% as.data.table()
has_more <- nrow(content) > 0 && 'price' %in% colnames(content)
if(has_more) {
content <- content[, list(mint, price)]
content <- unique(content)
content$price <- as.numeric(content$price) / (10^9)
has_more <- nrow(content) >= 40
colnames(content)[1] <- 'tokenMint'
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
df <- df[, list(collection, token_id, price)]
page <- page + 1
solana_listings <- rbind(solana_listings, df)
}
}
}
head(solana_listings)
# head(terra_listings)
# new_listings <- rbind(solana_listings, terra_listings)
new_listings <- unique(solana_listings)
# listings <- read.csv('./data/listings.csv') %>% as.data.table()
rem <- unique(new_listings$collection)
sort(rem)
listings <- listings[ !(collection %in% eval(rem)), ]
listings <- listings[, list(collection, token_id, price)]
listings <- rbind(listings, new_listings)
listings <- listings[order(collection, price)]
listings[, token_id := as.integer(token_id)]
listings <- listings[ !(collection %in% c('LunaBulls','Galactic Punks','Galactic Angels','Levana Dragon Eggs')) ]
listings <- listings[!is.na(price)]
listings <- listings %>% as.data.table()
sort(unique(listings$collection))
# write.csv(unique(listings[, collection]), '~/Downloads/tmp.csv', row.names=F)
floors <- listings %>%
group_by(collection) %>%
summarize(cur_floor = min(price)) %>%
as.data.table()
get_fmp <- function(data, coefsdf, pred_price) {
coefsdf[, tot := lin_coef + log_coef ]
coefsdf[, lin_coef := lin_coef / tot]
coefsdf[, log_coef := log_coef / tot]
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
fmp <- merge( pred_price, coefsdf, by=c('collection') )
fmp <- merge( fmp, data[, list(token_id, collection, block_timestamp, price, mn_20)], by=c('token_id','collection') )
# fmp <- merge( fmp, floors, by=c('collection') )
fmp[, abs_chg := (mn_20 - floor_price) * lin_coef ]
fmp[, pct_chg := (mn_20 - floor_price) * log_coef ]
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
}
if(FALSE) {
coefsdf[, tot := lin_coef + log_coef ]
coefsdf[, lin_coef := lin_coef / tot]
coefsdf[, log_coef := log_coef / tot]
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
fmp <- merge( pred_price, coefsdf, by=c('collection') )
fmp <- merge( fmp, floors, by=c('collection') )
fmp[, abs_chg := (cur_floor - floor_price) * lin_coef ]
fmp[, pct_chg := (cur_floor - floor_price) * log_coef ]
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
mn <- fmp %>% group_by(collection, cur_floor) %>% summarize(mn = min(fair_market_price)) %>% as.data.table()
mn[, ratio := cur_floor / mn]
fmp <- merge(fmp, mn[, list(collection, ratio)])
fmp[ratio < 1, fair_market_price := fair_market_price * ratio ]
fmp[, cur_sd := pred_sd * (cur_floor / floor_price) * SD_SCALE ]
fmp[, price_low := qnorm(.2, fair_market_price, cur_sd) ]
fmp[, price_high := qnorm(.8, fair_market_price, cur_sd) ]
fmp[, price_low := pmax(price_low, cur_floor * 0.975) ]
fmp[, price_high := pmax(price_high, cur_floor * 1.025) ]
fmp[, price_low := round(price_low, 2) ]
fmp[, price_high := round(price_high, 2) ]
fmp[, fair_market_price := pmax(cur_floor, fair_market_price) ]
fmp[, fair_market_price := round(fair_market_price, 2) ]
fmp[, cur_sd := round(cur_sd, 2) ]
head(fmp[collection == 'SOLGods'][order(fair_market_price)])
head(fmp[(collection == 'SOLGods') & (rk <= 4654)][order(fair_market_price)])
head(fmp[(collection == 'SOLGods') & (rk == 4654)][order(fair_market_price)])
tmp <- merge(listings, fmp, by = c('collection','token_id')) %>% as.data.table()
tmp[, deal_score := pnorm(price, fair_market_price, cur_sd) ]
tmp[, deal_score := 100 * (1 - deal_score) ]
tmp[, vs_floor := (price / cur_floor) - 1 ]
tmp[, vs_floor_grp := ifelse(vs_floor < .1, '<10%', ifelse(vs_floor < .25, '<25%', '>25%')) ]
tmp[, vs_floor := (price - cur_floor) ]
tmp <- tmp[ !(collection %in% c('Levana Dragon Eggs','Galactic Punks','LunaBulls','Galactic Angels','MAYC')) ]
t2 <- tmp[order(-deal_score),.SD[2], list(vs_floor_grp, collection)] %>% as.data.table()
t2 <- t2[, list(collection, vs_floor_grp, deal_score)][order(collection, vs_floor_grp)]
t3 <- tmp[order(-deal_score),.SD[3], list(vs_floor_grp, collection)] %>% as.data.table()
t3 <- t3[, list(collection, vs_floor_grp, deal_score)][order(collection, vs_floor_grp)]
colnames(t2) <- c('collection','vs_floor_grp','deal_score_g2')
colnames(t3) <- c('collection','vs_floor_grp','deal_score_g3')
tmp <- merge(tmp, t2, by=c('collection','vs_floor_grp'))
tmp <- merge(tmp, t3, by=c('collection','vs_floor_grp'))
t2 <- tmp[order(-deal_score),.SD[2], list(collection)] %>% as.data.table()
t2 <- t2[, list(collection, deal_score)][order(collection)]
t3 <- tmp[order(-deal_score),.SD[3], list(collection)] %>% as.data.table()
t3 <- t3[, list(collection, deal_score)][order(collection)]
colnames(t2) <- c('collection','deal_score_2')
colnames(t3) <- c('collection','deal_score_3')
tmp <- merge(tmp, t2, by=c('collection'))
tmp <- merge(tmp, t3, by=c('collection'))
tmp[, pts := (deal_score * 5 - deal_score_g2 - deal_score_g3 - deal_score_2 - deal_score_3) * ((cur_floor / price)**0.75) + (100 * (1 - (( price - cur_floor ) / (fair_market_price - cur_floor)))) ]
url <- 'https://discord.com/api/webhooks/976332557996150826/8KZqD0ov5OSj1w4PjjLWJtmgnCM9bPWaCkZUUEDMeC27Z0iqiA-ZU5U__rYU9tQI_ijA'
unique(tmp$collection)
for(col in c('price','pred_price','fair_market_price','vs_floor','deal_score','deal_score_2','deal_score_3','pts')) {
if(!'price' %in% col) {
tmp[, eval(col) := round(get(col)) ]
} else {
tmp[, eval(col) := ifelse(
get(col) < 10
, round(get(col), 2)
, ifelse(
get(col) < 100
, round(get(col), 1)
, round(get(col)))
)
]
}
}
tmp <- tmp[order(-pts)]
head(tmp[, list(collection, token_id, price, nft_rank, rk, pred_price, cur_floor, fair_market_price, deal_score, deal_score_2, deal_score_3, pts)], 20)
head(tmp[, list(collection, token_id, price, nft_rank, rk, pred_price, cur_floor, fair_market_price, deal_score, deal_score_2, deal_score_3, pts)], 20)
paste(head(tmp$label), collapse='\n')
tmp[, l := nchar(collection)]
mx <- max(tmp$l)
# tmp$clean_collection <- str_pad(collection, eval(mx) - l, side = 'right', pad = '-') ]
tmp$n_pad <- mx - tmp$l
tmp$clean_collection <- str_pad(tmp$collection, mx - tmp$l, side = 'right', pad = '-')
tmp[, clean_collection := str_pad(collection, eval(mx), pad='-', side='right')]
tmp[, clean_collection := str_pad(collection, eval(mx), pad='-', side='both')]
tmp$clean_collection <- str_pad(tmp$collection, mx, pad='-', )
tmp[, label := paste(clean_collection, str_pad(token_id, 4, side='left'), price, fair_market_price, deal_score, sep='\t')]
tmp[, label := paste(
clean_collection
, str_pad(token_id, 4, side='left')
, str_pad(rk, 4, side='left')
, str_pad(price, 4, side='left')
, str_pad(vs_floor, 5, side='left')
, str_pad(fair_market_price, 4, side='left')
, str_pad(deal_score, 2, side='left')
, str_pad(deal_score_2, 2, side='left')
, str_pad(deal_score_3, 2, side='left')
, str_pad(pts, 3, side='left')
, sep='\t')
]
header <- paste(
str_pad('collection', mx, side='both', pad='-')
, str_pad('id', 4, side='left')
, str_pad('rk', 4, side='left')
, str_pad('$', 3, side='left')
, str_pad('floor', 5, side='left')
, str_pad('fmp', 3, side='left')
, str_pad('ds', 2, side='left')
, str_pad('ds2', 2, side='left')
, str_pad('ds3', 2, side='left')
, str_pad('pts', 3, side='left')
, sep='\t')
tmp <- tmp[order(-pts)]
content <- tmp[ (price < 0.9 * fair_market_price) , head(.SD, 2), by = collection]
content <- head(content[order(-pts)], 15)
# content <- paste(c(header, content$label, collapse='\n'))
content <- paste(c(header, content$label), collapse='\n')
# content <- paste(c(header, head(tmp$label, 10)), collapse='\n')
data <- list(
content = paste0('```',content,'```')
)
res <- POST(url, body = data, encode = "form", verbose())
# tmp <- tmp[order(-deal_score)]
# head(tmp)
# plot_data[, deal_score := round(100 * (1 - y))]
# y <- pnorm(x, mu, sd)
# tmp[, deal_score := ((fair_market_price / price) - 1) ]
# tmp[, deal_score := ((fair_market_price / price) - 0) ]
# tmp <- tmp[order(-deal_score)]
# tmp <- tmp[, list(collection, token_id, fair_market_price, price, deal_score)]
# tmp[, .SD[1:3], collection]
# fmp <- fmp[, list(collection, token_id, nft_rank, rk, fair_market_price, price_low, price_high)]
fmp <- fmp[, list(collection, token_id, nft_rank, rk, fair_market_price, cur_floor, cur_sd, lin_coef, log_coef)]
colnames(fmp)[3] <- 'rarity_rank'
colnames(fmp)[4] <- 'deal_score_rank'
if (FALSE) {
for( cur_collection in unique(fmp$collection)) {
print(paste0('Working on ',cur_collection, '...'))
data <- fmp[collection == eval(cur_collection)]
KafkaGeneric(
.topic = 'prod-data-science-uploads'
, .url = 'https://kafka-rest-proxy.flipside.systems'
, .project = paste0('nft-deal-score-rankings-', cur_collection)
, .data = data
)
}
}
}
# write the floor prices to snowflake
data <- floors
KafkaGeneric(
.topic = 'prod-data-science-uploads'
, .url = 'https://kafka-rest-proxy.flipside.systems'
, .project = 'nft-deal-score-floors'
, .data = data
)
sort(unique(listings$collection))
save(
listings
, file = listings_file
)
if(!isRstudio) {
write.csv(listings, paste0(base_dir, 'nft_deal_score_listings.csv'))
}
```
Done updating at `r Sys.time()`
The end. Byeeeee.

View File

@ -0,0 +1,655 @@
import re
import os
import json
import time
# import math
import requests
import pandas as pd
# import urllib.request
import snowflake.connector
from time import sleep
# from solana_model import just_float
# from utils import clean_name, clean_token_id, format_num, merge
############################
# Define Constants #
############################
BASE_PATH = '/home/data-science'
DATA_FOLDER = '/rstudio-data/nft_labels'
RPC = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
##############################
# Load DB Connection #
##############################
with open('{}/data_science/util/snowflake.pwd'.format(BASE_PATH), 'r') as f:
pwd = f.readlines()[0].strip()
with open('{}/data_science/util/snowflake.usr'.format(BASE_PATH), 'r') as f:
usr = f.readlines()[0].strip()
ctx = snowflake.connector.connect(
user=usr,
password=pwd,
account='vna27887.us-east-1'
)
############################
# Helper Functions #
############################
def read_csv(data_folder, fname):
return(pd.read_csv('{}/{}.csv'.format(data_folder, fname)))
def write_csv(data_folder, fname, df, verbose = True):
df.to_csv('{}/{}.csv'.format(data_folder, fname), index=False)
if verbose:
print('Wrote {} rows to {}'.format(len(df), fname))
def clean_colnames(df):
names = [ x.lower() for x in df.columns ]
df.columns = names
return(df)
def clean_collection_name(x):
x = re.sub('\|', '-', x).strip()
x = re.sub('\)', '', x).strip()
x = re.sub('\(', '', x).strip()
x = re.sub('\'', '', x).strip()
return(x)
def merge(left, right, on=None, how='inner', ensure=True, verbose=True):
df = left.merge(right, on=on, how=how)
if len(df) != len(left) and (ensure or verbose):
print('{} -> {}'.format(len(left), len(df)))
cur = left.merge(right, on=on, how='left')
cols = set(right.columns).difference(set(left.columns))
print(cols)
col = list(cols)[0]
missing = cur[cur[col].isnull()]
print(missing.head())
if ensure:
assert(False)
return(df)
def Convert(tup, di):
di = dict(tup)
return di
####################################
# Metadata From HowRare.Is #
####################################
def how_rare_is_api():
query = '''
SELECT DISTINCT LOWER(project_name) AS lower_collection
FROM solana.core.dim_nft_metadata
'''
df = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
url = 'https://api.howrare.is/v0.1/collections'
r = requests.get(url)
j = r.json()
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
c_df['lower_collection'] = c_df.url.apply(lambda x: x.lower().strip() )
seen = sorted(df.LOWER_COLLECTION.apply(lambda x: re.sub(' |_|\'', '', x) ).values)
c_df['seen_1'] = c_df.url.apply(lambda x: re.sub(' |_|\'', '', x[1:]).lower() in seen ).astype(int)
c_df['seen_2'] = c_df.name.apply(lambda x: re.sub(' |_|\'', '', x).lower() in seen ).astype(int)
c_df['seen'] = (c_df.seen_1 + c_df.seen_2 > 0).astype(int)
seen = seen + [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
c_df = c_df[-(c_df.url.isin([ '/'+x for x in seen]))]
c_df = c_df[c_df.seen == 0]
it = 0
tot = len(c_df)
m_data = []
print('Pulling metadata for {} collections'.format(tot))
for row in c_df.iterrows():
it += 1
row = row[1]
collection = row['name']
print('#{} / {}: {}'.format(it, tot, collection))
url = row['url'][1:]
if it > 1:
assert(len(m_data))
url = 'https://api.howrare.is/v0.1/collections/'+url
r = requests.get(url)
j = r.json()
n_errors = 0
for i in j['result']['data']['items']:
try:
token_id = int(i['id'])
mint = i['mint']
image = i['image']
for d in i['attributes']:
d['token_id'] = token_id
d['collection'] = collection
d['mint_address'] = mint
d['image_url'] = image
m_data += [ d ]
except:
# print('Error')
n_errors += 1
pass
if n_errors:
print('{} errors'.format(n_errors))
metadata = pd.DataFrame(m_data).rename(columns={'name':'feature_name', 'value':'feature_value'})
write_csv(DATA_FOLDER, 'howrare_labels', metadata[['collection','mint_address']])
a = metadata.groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
a.columns = ['collection','mint_address','token_id','image_url', 'token_metadata']
a['commission_rate'] = None
a['contract_address'] = a.mint_address
a['contract_name'] = a.collection
a['created_at_block_id'] = 0
a['created_at_timestamp'] = '2021-01-01'
a['created_at_tx_id'] = ''
a['creator_address'] = a.mint_address
a['creator_name'] = a.collection
a['project_name'] = a.collection
a['token_metadata_uri'] = a.image_url
a['token_name'] = a.collection
a['n'] = range(len(a))
a['n'] = a.n.apply(lambda x: int(x/50) )
a['token_id'] = a.token_id.astype(int)
# remove existing files
fnames = os.listdir(DATA_FOLDER+'/metadata/results/')
print('fnames')
print(fnames)
for f in fnames:
os.remove(DATA_FOLDER+'/metadata/results/'+f)
# write new metadata incrementally to upload to solana.core.dim_nft_metadata
n = 100000
tot = int(len(a) / n) + 1
for i in range(0, len(a), n):
ind = int(i/n)
print('#{} / {}'.format(ind, tot))
g = a.head(i+n).tail(n).to_dict('records')
txt = [
{
"model": {
"blockchain": "solana",
"sinks": [
{
"destination": "{database_name}.silver.nft_metadata",
"type": "snowflake",
"unique_key": "blockchain || contract_address || token_id"
}
],
},
"results": g[x:x+50]
}
for x in range(0, len(g), 50)
]
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
write_csv( DATA_FOLDER, 'metadata/results/{}'.format(ind), w )
return
#################################
# Load Data From ME API #
#################################
def mints_from_me():
##################################
# Get All ME Collections #
##################################
headers = {
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
}
data = []
has_more = 1
offset = 0
while has_more:
sleep(1)
print(offset)
url = 'https://api-mainnet.magiceden.dev/v2/collections?offset={}&limit=500'.format(offset)
r = requests.get(url)
j = r.json()
data = data + j
has_more = len(j)
offset += 500
df = pd.DataFrame(data)
write_csv(DATA_FOLDER, 'me_collections', df)
# df.to_csv('{}/me_collections.csv'.format(DATA_FOLDER), index=False)
df = read_csv(DATA_FOLDER, 'me_collections')
# df = pd.read_csv('./data/me_collections.csv')
###########################################
# Get 1 Mint From Each Collection #
###########################################
it = 0
l_data = []
# old_l_df = pd.read_csv('./data/me_mints.csv')
old_l_df = read_csv(DATA_FOLDER, 'me_mints')
seen = list(old_l_df.symbol.unique())
print('We\'ve already seen {} / {} mints from ME'.format(len(seen), len(df)))
df = df[ -df.symbol.isin(seen) ]
df = df[ (df.symbol.notnull()) & (df.symbol != '') ]
df = df.sort_values('symbol')
tot = len(df)
start = time.time()
for row in df.iterrows():
sleep(0.5)
it += 1
row = row[1]
# print('Listings on {}...'.format(row['symbol']))
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/activities?offset=0&limit=1'.format(row['symbol'])
if row['symbol'] in seen:
print('Seen')
continue
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
try:
print('Re-trying in 10s')
sleep(10)
r = requests.get(url, headers=headers)
j = r.json()
except:
try:
print('Re-trying in 60s')
sleep(60)
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 60s (again!)')
sleep(60)
r = requests.get(url, headers=headers)
j = r.json()
if len(j):
l_data += [[ row['symbol'], row['name'], j[0]['tokenMint'] ]]
if it == 1 or it % 10 == 0:
print('#{} / {} ({} records in {} secs)'.format(it, tot, len(l_data), round(time.time() - start)))
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
l_df = pd.concat([l_df, old_l_df]).drop_duplicates(subset=['symbol'])
print('Adding {} rows to me_mints'.format(len(l_df) - len(old_l_df)))
# l_df.to_csv('./data/me_mints.csv', index=False)
write_csv(DATA_FOLDER, 'me_mints', l_df)
######################################################
# Get Update Authorities For All Collections #
######################################################
# l_df = pd.read_csv('./data/me_mints.csv')
# m_old = pd.read_csv('./data/me_update_authorities.csv')
m_old = read_csv(DATA_FOLDER, 'me_update_authorities')
m_old['seen'] = 1
m_data = list(m_old[['symbol','name','update_authority','seen']].values)
seen = [ x[0] for x in m_data ]
print('Seen {} m_data'.format(len(seen)))
l_df = l_df[-l_df.symbol.isin(seen)]
l_df = l_df.sort_values('symbol')
it = 0
for row in l_df.iterrows():
sleep(.5)
it += 1
row = row[1]
symbol = row['symbol']
print('Working on {}...'.format(symbol))
if symbol in seen:
print('Seen')
continue
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(row['mint_address'])
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 10s')
sleep(10)
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 60s')
sleep(60)
r = requests.get(url, headers=headers)
j = r.json()
if 'updateAuthority' in j.keys():
m_data += [[ row['symbol'], row['name'], j['updateAuthority'], 0 ]]
if it % 10 == 0:
print('it#{}: {}'.format(it, len(m_data)))
# m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
# m_df.to_csv('./data/me_update_authorities.csv', index=False)
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority','seen'])
m_df = m_df.drop_duplicates()
print('Adding {} rows to me_update_authorities'.format(len(m_df) - len(m_old)))
write_csv(DATA_FOLDER, 'me_update_authorities', m_df)
# m_df.to_csv('./data/me_update_authorities.csv', index=False)
def pull_from_metaboss():
######################################################
# Get Update Authorities For All Collections #
######################################################
# m_df = pd.read_csv('./data/me_update_authorities.csv')
m_df = read_csv(DATA_FOLDER, 'me_update_authorities')
n_auth = m_df.groupby('update_authority').name.count().reset_index().rename(columns={'name':'n_auth'})
m_df = m_df.merge(n_auth)
l1 = len(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1)])
l2 = len(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1)])
print('{} with 1 update_authority; {} with 2+ update_authority'.format(l1, l2))
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1) ].update_authority.unique())
need = m_df[m_df.update_authority.isin(need)]
# l_df = pd.read_csv('./data/me_mints.csv')
l_df = read_csv(DATA_FOLDER, 'me_mints')
fix = need.merge(l_df[[ 'name','mint_address' ]])
need = fix.copy().rename(columns={'name':'collection'})
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
need['collection'] = need.collection.apply(lambda x: clean_collection_name(x) )
need = need.drop_duplicates(subset=['update_authority']).sort_values('collection')
# need = need.head(2)
mfiles = ['/data/mints/{}/{}_mint_accounts.json'.format(re.sub(' |-', '_', collection), update_authority) for collection, update_authority in zip(need.collection.values, need.update_authority.values) ]
seen = [ x for x in mfiles if os.path.exists(x) ]
seen = []
# for update authorities that have only 1 collection, we can just check metaboss once
mfolder = '{}/mints/'.format(DATA_FOLDER)
it = 0
tot = len(need)
for row in need.iterrows():
it += 1
row = row[1]
collection = row['collection']
print('#{} / {}: {}'.format(it, tot, collection))
# if collection in seen:
# continue
update_authority = row['update_authority']
# print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = '{}{}/'.format(mfolder, collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
seen.append(update_authority)
os.system('metaboss -r {} -T 300 snapshot mints --update-authority {} --output {}'.format(RPC, update_authority, dir))
# write the mints to csv
data = []
for path in os.listdir(mfolder):
if os.path.isdir('{}{}'.format(mfolder, path)):
collection = re.sub('_', ' ', path).strip()
for fname in os.listdir(mfolder+path):
f = mfolder+path+'/'+fname
if os.path.isfile(f) and '.json' in f:
with open(f) as file:
j = json.load(file)
for m in j:
data += [[ collection, m ]]
df = pd.DataFrame(data, columns=['collection','mint_address'])
df.collection.unique()
write_csv(DATA_FOLDER, 'single_update_auth_labels', df)
# df.to_csv('./data/single_update_auth_labels.csv', index=False)
################################
# Multiple Authorities #
################################
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1) ].update_authority.unique())
need = m_df[m_df.update_authority.isin(need)]
fix = need.merge(l_df[[ 'name','mint_address' ]])
need = fix.copy().rename(columns={'name':'collection'})
need['collection'] = need.collection.apply(lambda x: clean_collection_name(x) )
need = need.sort_values('collection').drop_duplicates(subset=['update_authority'], keep='first')
# need = need.head(2)
it = 0
a = []
for row in need.iterrows():
it += 1
print('#{}/{}'.format(it, len(need)))
row = row[1]
collection = row['collection']
update_authority = row['update_authority']
print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = '{}{}/'.format(mfolder, collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
a.append(update_authority)
os.system('metaboss -r {} -T 300 snapshot mints --update-authority {} --output {}'.format(RPC, update_authority, dir))
odir = dir+'output/'
if not os.path.exists(odir):
print('Making dir {}'.format(odir))
os.makedirs(odir)
os.system('metaboss -r {} -T 300 decode mint --list-file {} --output {}'.format(RPC, mfile, odir ))
##################################################
# Load All The Mints for Each Collection #
##################################################
# now that we have the mints, create a data frame with the info for each mint in each collection
mfolder = '{}/mints/'.format(DATA_FOLDER)
data = []
seen = [ x[1] for x in data ]
it = 0
dirs = sorted(os.listdir(mfolder))
dirs = [ x for x in dirs if not x in ['3D_Sniping_Demons']]
tot = len(dirs)
for path in dirs:
print('{} / {} ({} records)'.format(it, tot, len(data)))
it += 1
if os.path.isdir(mfolder+path):
collection = re.sub('_', ' ', path).strip()
print('Found {}'.format(collection))
if not os.path.exists(mfolder+path+'/output/'):
print('No output')
continue
fnames = os.listdir(mfolder+path+'/output/')
print('{} files found'.format(len(fnames)))
for fname in fnames:
f = mfolder+path+'/output/'+fname
if fname[:-5] in seen:
continue
if os.path.isfile(f) and '.json' in f:
try:
with open(f) as file:
j = json.load(file)
data += [[ collection, fname, j['name'], j['symbol'], j['uri'] ]]
except:
print('Error {}'.format(fname[:-5]))
##################################################
# Load All The Mints for Each Collection #
##################################################
new_mints = pd.DataFrame(data, columns=['collection','mint_address','name','symbol','uri'])
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
n = len(new_mints[(new_mints.uri.isnull()) | (new_mints.uri == '')])
tot = len(new_mints)
pct = round(n * 100 / tot, 1)
print('{} ({}%) rows have no uri'.format(n, pct))
new_mints = new_mints[new_mints.uri != '']
# function to clean the name of each NFT (remove the number)
def f_cn(x):
if not x or x != x:
return(x)
if '#' in x[-6:]:
x = ''.join(re.split('#', x)[:-1]).strip()
elif bool(re.match('.+\s+[0-9]+', x)):
x = ' '.join(re.split(' ', x)[:-1]).strip()
return(x)
new_mints['clean_name'] = new_mints.name.apply(lambda x: f_cn(x) )
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
a = new_mints.drop_duplicates(subset=['collection','clean_name','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
symbol_only = a[a.uri > 10].collection.unique()
b = new_mints[new_mints.collection.isin(symbol_only)].drop_duplicates(subset=['collection','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
collection_only = b[b.uri > 10].collection.unique()
symbol_only = [x for x in symbol_only if not x in collection_only]
# now get the info for each collection-name-symbol combo
g1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','clean_name','symbol']).head(1).reset_index()
g2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','symbol']).head(1).reset_index()
g3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ].groupby(['collection']).head(1).reset_index()
g = pd.concat([g1, g2, g3]).drop_duplicates(subset=['mint_address'])
print('{} Total: {} all, {} collection-symbol {} collection'.format(len(g), len(g1), len(g2), len(g3)))
# g.to_csv('~/Downloads/tmp-g.csv', index=False)
# iterate over each row to get what collection they are actually in
# by pulling data from the uri
uri_data = []
it = 0
tot = len(g)
print(tot)
errs = []
seen = [ x['uri'] for x in uri_data ]
# for row in g[ -(g.uri.isin(seen)) ].iterrows():
for row in g.iterrows():
row = row[1]
it += 1
# if it % 100 == 0:
# uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
# uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
print('#{} / {}: {}'.format(it, tot, row['collection']))
try:
r = requests.get(row['uri'])
j = r.json()
j['uri'] = row['uri']
j['row_collection'] = row['collection']
j['row_clean_name'] = row['clean_name']
j['row_symbol'] = row['symbol']
j['mint_address'] = row['mint_address']
uri_data += [j]
except:
print('Error')
errs.append(row)
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
write_csv(DATA_FOLDER, 'uri_df', uri_df)
# uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
# for each row, parse the json from the uri
# uri_df = pd.read_csv('~/Downloads/uri_df.csv')
# read_csv(DATA_FOLDER, 'uri_df')
def f(x, c):
x = str(x)
try:
n = json.loads(re.sub("'", "\"", x))[c]
if type(n) == list:
return(n[0])
return(n)
except:
try:
return(json.loads(re.sub("'", "\"", x))[c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0][c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0])
except:
return(x)
# parse the json more
uri_df['parsed_collection'] = uri_df.collection.apply(lambda x: f(x, 'name') )
uri_df['parsed_family'] = uri_df.collection.apply(lambda x: f(x, 'family') )
uri_df['clean_name'] = uri_df.name.apply( lambda x: f_cn(x) )
# calculate what the collection name is
uri_df['use_collection'] = uri_df.parsed_collection.replace('', None).fillna( uri_df.clean_name )#.fillna( uri_df.row_symbol )
# uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].head()
# uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].to_csv('~/Downloads/tmp.csv', index=False)
len(uri_df)
# clean the collection name
def f1(x):
try:
if len(x['use_collection']) == 1:
return(x['clean_name'])
if bool(re.match('.+\s+#[0-9]+', x['use_collection'])):
return(''.join(re.split('#', x['use_collection'])[:-1]).strip())
if '{' in x['use_collection']:
return(x['clean_name'])
return(x['use_collection'].strip().title())
except:
return(x['use_collection'].strip().title())
uri_df['tmp'] = uri_df.apply(lambda x: f1(x), 1 )
uri_df['use_collection'] = uri_df.apply(lambda x: f1(x), 1 )
# clean the mint_address
uri_df['mint_address'] = uri_df.mint_address.apply(lambda x: re.sub('.json','', x))
uri_df = uri_df.fillna('None')
for i in range(2):
# for each collection-name-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_clean_name','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_clean_name','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_1'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_2'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','use_collection']].drop_duplicates().groupby(['row_collection']).use_collection.count().reset_index().rename(columns={'use_collection':'n_3'})
uri_df = merge(uri_df, a, ensure=True)
uri_df['n'] = uri_df.apply(lambda x: x['n_3'] if x['row_collection'] in collection_only else x['n_2'] if x['row_collection'] in symbol_only else x['n_1'], 1 )
print('{} / {} ({}%) have multiple collection-name-symbol mappings'.format(len(uri_df[uri_df.n > 1]), len(uri_df), round( 100.0 * len(uri_df[uri_df.n > 1]) / len(uri_df))))
# if there is multiple, use the parsed_family instead of the use_collection
uri_df['use_collection'] = uri_df.apply(lambda x: x['use_collection'] if x['n'] == 1 else x['parsed_family'], 1 )
del uri_df['n_1']
del uri_df['n_2']
del uri_df['n_3']
# only take rows where there is a single mapping
m = uri_df[uri_df.n==1][[ 'use_collection','row_collection','row_clean_name','row_symbol' ]].dropna().drop_duplicates()
m.columns = [ 'use_collection','collection','clean_name','symbol' ]
m_1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].fillna('').merge(m.fillna(''), how='left')
m_2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address','symbol' ]].fillna('').merge(m.fillna(''), how='left')
m_3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address' ]].fillna('').merge(m.fillna(''), how='left')
len(m_1) + len(m_2) + len(m_3)
len(new_mints)
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
m = pd.concat( [m_1, m_2, m_3] )
print('After all this, we have {}% of the mints'.format( round(len(m) * 100 / len(new_mints)) ))
len(new_mints)
len(m)
m['mint_address'] = m.mint_address.apply(lambda x: re.sub('.json', '', x) )
m = m[['mint_address','use_collection']].dropna().drop_duplicates()
m.columns = ['mint_address','collection']
m[m.collection.isnull()].head()
m[m.collection=='Nan'].head()
m = m[m.collection != 'Nan']
tmp = m.groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
tmp.head()
# m.to_csv('./data/mult_update_auth_labels.csv', index=False)
write_csv(DATA_FOLDER, 'mult_update_auth_labels', m)
def compile():
single_update_auth_labels = read_csv(DATA_FOLDER, 'single_update_auth_labels')
mult_update_auth_labels = read_csv(DATA_FOLDER, 'mult_update_auth_labels')
howrare_labels = read_csv(DATA_FOLDER, 'howrare_labels')
df = pd.concat([howrare_labels, single_update_auth_labels, mult_update_auth_labels])
df = df[ (df.collection != 'Nan') & (df.collection != 'nan') & (df.collection.notnull()) ]
df = df[ (df.mint_address != 'Nan') & (df.mint_address != 'nan') & (df.mint_address.notnull()) ]
df = df.drop_duplicates(subset=['mint_address'], keep='first')
write_csv(DATA_FOLDER, 'solana_nft_labels', df[['mint_address','collection']])
# print('Loaded!')
# mints_from_me()
# pull_from_metaboss()
# compile()
# how_rare_is_api()

View File

@ -16,9 +16,12 @@ clean_names = {
,'boredapeyachtclub': 'BAYC'
,'mutantapeyachtclub': 'MAYC'
,'bayc': 'BAYC'
,'bakc': 'BAKC'
,'mayc': 'MAYC'
,'solgods': 'SOLGods'
,'meerkatmillionairescc': 'Meerkat Millionaires'
,'ggsg:galacticgeckos': 'Galactic Geckos'
,'solstein': 'SolStein'
# ,'stonedapecrew': 'Stoned Ape Crew'
}
@ -47,6 +50,7 @@ def clean_name(name):
name = re.sub('-', ' ', name)
name = re.sub(' On ', ' on ', name)
name = re.sub('Defi ', 'DeFi ', name)
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
return(name)