mirror of
https://github.com/FlipsideCrypto/nft-deal-score.git
synced 2026-02-06 10:56:58 +00:00
Merge pull request #43 from FlipsideCrypto/kellen-updates-2022-02-03
Kellen updates 2022 02 03
This commit is contained in:
commit
af33e78dd3
@ -9,6 +9,7 @@ sudo cp ~/nft-deal-score/viz/nft_deal_score_data.RData /srv/shiny-server/nft-dea
|
||||
|
||||
sudo cp ~/nft_deal_score_listings_data.RData /rstudio-data
|
||||
sudo cp ~/nft_deal_score_sales_data.RData /rstudio-data
|
||||
sudo cp ~/nft_deal_score_sales.csv /rstudio-data
|
||||
sudo cp ~/nft_deal_score_data.RData /rstudio-data
|
||||
sudo cp ~/nft_deal_score_listings.csv /rstudio-data
|
||||
sudo cp ~/nft_deal_score_sales.csv /rstudio-data
|
||||
|
||||
@ -27,7 +27,7 @@ attributes[, feature_name := trimws(feature_name) ]
|
||||
attributes[, feature_value := trimws(as.character(feature_value)) ]
|
||||
feature_values <- read_csv('feature_values.csv')
|
||||
sales <- read_csv('model_sales.csv')
|
||||
listings <- read_csv('listings.csv')
|
||||
listings <- read.csv('/Users/kellenblumberg/git/nft-deal-score/viz/nft_deal_score_listings.csv') %>% as.data.table()
|
||||
coefsdf <- read_csv('coefsdf.csv')
|
||||
tokens <- read_csv('tokens.csv')
|
||||
tokens[, token_id := clean_token_id]
|
||||
@ -39,18 +39,25 @@ listings <- listings[ !(collection == 'Solana Monkey Business' & token_id == 953
|
||||
tokens[, token_id := as.numeric(token_id)]
|
||||
|
||||
# manual adjustments to price
|
||||
ids_1 <- attributes[ (collection == 'Aurory') & (feature_value == 'Solana Blob') ]$token_id
|
||||
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_1), pred_price := (pred_price * 0.8) ]
|
||||
# ids_1 <- attributes[ (collection == 'Aurory') & (feature_value == 'Solana Blob') ]$token_id
|
||||
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_1), pred_price := (pred_price * 0.8) ]
|
||||
|
||||
ids_2 <- attributes[ (collection == 'Aurory') & (feature_value == 'Long Blob Hair ') ]$token_id
|
||||
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_2), pred_price := (pred_price * 0.90) ]
|
||||
# ids_2 <- attributes[ (collection == 'Aurory') & (feature_value == 'Long Blob Hair ') ]$token_id
|
||||
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_2), pred_price := (pred_price * 0.90) ]
|
||||
|
||||
ids_3 <- attributes[ (collection == 'Aurory') & (grepl( 'Mask', feature_value, fixed = TRUE)) ]$token_id
|
||||
pred_price[ collection == 'Aurory' & token_id %in% eval(ids_3), pred_price := (pred_price * 0.975) ]
|
||||
# ids_3 <- attributes[ (collection == 'Aurory') & (grepl( 'Mask', feature_value, fixed = TRUE)) ]$token_id
|
||||
# pred_price[ collection == 'Aurory' & token_id %in% eval(ids_3), pred_price := (pred_price * 0.975) ]
|
||||
|
||||
sales[collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
pred_price[collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
# sales[collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
# pred_price[collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
listings[collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
cols <- c( 'Citizens By Solsteads' )
|
||||
# sales[, tmp := tolower(coll)]
|
||||
for (col in cols) {
|
||||
sales[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
|
||||
pred_price[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
|
||||
listings[ tolower(collection) == eval(tolower(col)), collection := eval(col) ]
|
||||
}
|
||||
|
||||
|
||||
sort(unique(listings$collection))
|
||||
@ -58,10 +65,13 @@ sort(unique(pred_price$collection))
|
||||
sort(unique(sales$collection))
|
||||
|
||||
# filter for only collections that have all data
|
||||
a <- unique(pred_price[, list(collection)])
|
||||
b <- unique(sales[, list(collection)])
|
||||
c <- unique(listings[, list(collection)])
|
||||
a <- unique(pred_price[, list(collection)][order(collection)])
|
||||
b <- unique(sales[, list(collection)][order(collection)])
|
||||
c <- unique(listings[, list(collection)][order(collection)])
|
||||
d <- merge(merge(a, b), c)
|
||||
d <- d[order(collection)]
|
||||
d <- d[ collection %in% c('Aurory','Bubblegoose Ballers','Catalina Whale Mixer','Cets on Creck','DeGods','Degen Apes','Famous Fox Federation','Meerkat Millionaires','Okay Bears','Pesky Penguins','Primates','SOLGods','Solana Monkey Business','Stoned Ape Crew','ThugbirdzcMAYC') ]
|
||||
write.csv(d, '~/Downloads/tmp.csv', row.names=F)
|
||||
|
||||
pred_price <- merge(pred_price, d, by=c('collection'))
|
||||
attributes <- merge(attributes, d, by=c('collection'))
|
||||
@ -91,6 +101,7 @@ save(
|
||||
, tokens
|
||||
, file = paste0(file.location,'nft_deal_score_data.Rdata')
|
||||
)
|
||||
|
||||
# save(
|
||||
# listings
|
||||
# , file = paste0(file.location,'nft_deal_score_listings_data.Rdata')
|
||||
|
||||
231
format_data.py
231
format_data.py
@ -4,6 +4,7 @@ import os
|
||||
import math
|
||||
import json
|
||||
from typing import Collection
|
||||
from nbformat import write
|
||||
import pandas as pd
|
||||
import snowflake.connector
|
||||
|
||||
@ -364,15 +365,37 @@ def solana():
|
||||
|
||||
|
||||
|
||||
query = '''
|
||||
SELECT DISTINCT project_name
|
||||
FROM solana.dim_nft_metadata
|
||||
'''
|
||||
seen = ctx.cursor().execute(query)
|
||||
seen = pd.DataFrame.from_records(iter(seen), columns=[x[0] for x in seen.description])
|
||||
seen = clean_colnames(seen)
|
||||
seen = list(seen.project_name.values)
|
||||
seen = [ x.lower() for x in seen ]
|
||||
|
||||
metadata = pd.read_csv('./data/metadata.csv')
|
||||
len(metadata)
|
||||
# print(sorted(metadata.collection.unique()))
|
||||
# metadata = metadata[metadata.collection == collection]
|
||||
# print(sorted(metadata.collection.unique()))
|
||||
metadata = metadata[-(metadata.feature_name.isin(['adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2','nft_rank']))]
|
||||
metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
|
||||
len(metadata.token_id.unique())
|
||||
id_map = pd.read_csv('./data/mint_to_token_id_map.csv')
|
||||
# id_map = pd.read_csv('./data/mint_to_token_id_map.csv')
|
||||
id_map = pd.read_csv('./data/tokens.csv')
|
||||
cs = ['Stoned Ape Crew']
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
tokens.collection.unique()
|
||||
len(tokens.collection.unique())
|
||||
cs = [ x for x in id_map.collection.unique() if not x.lower() in seen ]
|
||||
len(id_map.collection.unique())
|
||||
len(cs)
|
||||
id_map = id_map[id_map.collection.isin(cs)]
|
||||
metadata = metadata[metadata.collection.isin(cs)]
|
||||
|
||||
# cs = metadata[metadata.chain.fillna('Solana') == 'Solana'].collection.unique()
|
||||
cs = metadata.collection.unique()
|
||||
id_map = id_map[id_map.collection.isin(cs)]
|
||||
metadata = metadata[metadata.collection.isin(cs)]
|
||||
sorted(id_map.collection.unique())
|
||||
@ -399,51 +422,180 @@ def solana():
|
||||
# sorted(metadata.feature_name.unique())
|
||||
|
||||
# metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
|
||||
# Python code to convert into dictionary
|
||||
def Convert(tup, di):
|
||||
di = dict(tup)
|
||||
return di
|
||||
|
||||
metadata = metadata[-metadata.collection.isin(['LunaBulls', 'Levana Dragon Eggs'])]
|
||||
metadata['token_id'] = metadata.token_id.astype(float)
|
||||
metadata['token_id'] = metadata.token_id.astype(int)
|
||||
metadata.groupby(['collection','feature_name']).token_id.count()
|
||||
metadata.head()
|
||||
metadata[metadata.mint_address.isnull()].collection.unique()
|
||||
assert(len(metadata[metadata.mint_address.isnull()]) == 0)
|
||||
for collection in metadata.collection.unique():
|
||||
print(collection)
|
||||
dirs = sorted(list(set(os.listdir('./data/metadata/')).intersection(set(metadata.collection.unique()))))
|
||||
sorted(list(metadata.collection.unique()))
|
||||
# collection = 'Bubblegoose Ballers'
|
||||
it = 0
|
||||
tot = len(metadata.collection.unique())
|
||||
data = []
|
||||
for collection in metadata.collection.unique()[:1]:
|
||||
print('#{} / {}: {}'.format(it, tot, collection))
|
||||
mdf = metadata[metadata.collection == collection]
|
||||
results = []
|
||||
for token_id in sorted(mdf.token_id.unique()):
|
||||
if token_id % 1000 == 1:
|
||||
print(token_id, len(results))
|
||||
cur = mdf[mdf.token_id == token_id]
|
||||
token_metadata = {}
|
||||
# m = mints[(mints.collection == collection) & (mints.token_id == token_id) ]
|
||||
m = metadata[(metadata.collection == collection) & (metadata.token_id == token_id) ]
|
||||
m = m.fillna('None')
|
||||
if not len(m):
|
||||
print(token_id)
|
||||
continue
|
||||
# mint_address = m.mint_address.values[0] if 'mint_address' in m.columns else ''
|
||||
mint_address = m.mint_address.values[0]
|
||||
for row in cur.iterrows():
|
||||
row = row[1]
|
||||
token_metadata[row['feature_name']] = row['feature_value']
|
||||
df.groupby('Column1')[['Column2', 'Column3']].apply(lambda g: g.values.tolist()).to_dict()
|
||||
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: g.values.tolist()).to_dict()
|
||||
|
||||
d = {
|
||||
'commission_rate': None
|
||||
, 'mint_address': mint_address
|
||||
, 'token_id': token_id
|
||||
, 'contract_address': mint_address
|
||||
, 'contract_name': row['collection']
|
||||
, 'created_at_block_id': 0
|
||||
, 'created_at_timestamp': str('2021-01-01')
|
||||
, 'created_at_tx_id': ''
|
||||
, 'creator_address': mint_address
|
||||
, 'creator_name': row['collection']
|
||||
, 'image_url': 'None'
|
||||
, 'project_name': row['collection']
|
||||
, 'token_id': int(token_id)
|
||||
, 'token_metadata': token_metadata
|
||||
, 'token_metadata_uri': row['image_url']
|
||||
, 'token_name': row['collection']
|
||||
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: list(map(tuple, g.values.tolist())) ).to_dict()
|
||||
|
||||
mdf.head(20).groupby(['collection','image_url','token_id'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).to_dict()
|
||||
a = mdf.head(20).groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
|
||||
|
||||
|
||||
a = metadata.groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
|
||||
a.columns = ['collection','mint_address','token_id','image_url', 'token_metadata']
|
||||
a['commission_rate'] = None
|
||||
a['contract_address'] = a.mint_address
|
||||
a['contract_name'] = a.collection
|
||||
a['created_at_block_id'] = 0
|
||||
a['created_at_timestamp'] = '2021-01-01'
|
||||
a['created_at_tx_id'] = ''
|
||||
a['creator_address'] = a.mint_address
|
||||
a['creator_name'] = a.collection
|
||||
a['project_name'] = a.collection
|
||||
a['token_metadata_uri'] = a.image_url
|
||||
a['token_name'] = a.collection
|
||||
a.to_csv('./data/metadata/results.csv', index=False)
|
||||
a['n'] = range(len(a))
|
||||
a['n'] = a.n.apply(lambda x: int(x/50) )
|
||||
a['token_id'] = a.token_id.astype(int)
|
||||
cols = ['collection', 'mint_address', 'token_id', 'image_url', 'token_metadata',
|
||||
'commission_rate', 'contract_address', 'contract_name',
|
||||
'created_at_block_id', 'created_at_timestamp', 'created_at_tx_id',
|
||||
'creator_address', 'creator_name', 'project_name', 'token_metadata_uri',
|
||||
'token_name']
|
||||
|
||||
n = 100000
|
||||
tot = int(len(a) / n) + 1
|
||||
for i in range(0, len(a), n):
|
||||
ind = int(i/n)
|
||||
print('#{} / {}'.format(ind, tot))
|
||||
g = a.head(i+n).tail(n).to_dict('records')
|
||||
txt = [
|
||||
{
|
||||
"model": {
|
||||
"blockchain": "solana",
|
||||
"sinks": [
|
||||
{
|
||||
"destination": "{database_name}.silver.nft_metadata",
|
||||
"type": "snowflake",
|
||||
"unique_key": "blockchain || contract_address || token_id"
|
||||
}
|
||||
],
|
||||
},
|
||||
"results": g[x:x+50]
|
||||
}
|
||||
for x in range(0, len(g), 50)
|
||||
]
|
||||
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
|
||||
# w['results'] = w.results.apply(lambda x: x[1:-1] )
|
||||
w.to_csv('./data/metadata/results/{}.csv'.format(ind), index=False)
|
||||
# with open('./data/metadata/results/{}.json'.format(i), 'w') as outfile:
|
||||
# json.dump(results[i:i+100000], outfile)
|
||||
|
||||
g = a.head(200).groupby('n')[cols].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).to_dict()
|
||||
g = a.head(200).groupby('n')[cols].apply(lambda g: (list(map(tuple, g.values.tolist())), {}) )
|
||||
g = a.head(200).groupby('n')[cols].apply(lambda g: g.values.tolist()).reset_index()
|
||||
g = a.head(200).to_dict('records')
|
||||
sorted(a.collection.unique())
|
||||
g = a[a.collection == 'Jungle Cats'].head(20000).to_dict('records')
|
||||
txt = [
|
||||
{
|
||||
"model": {
|
||||
"blockchain": "solana",
|
||||
"sinks": [
|
||||
{
|
||||
"destination": "{database_name}.silver.nft_metadata",
|
||||
"type": "snowflake",
|
||||
"unique_key": "blockchain || contract_address || token_id"
|
||||
}
|
||||
],
|
||||
},
|
||||
"results": g[i:i+50]
|
||||
}
|
||||
results.append(d)
|
||||
for i in range(0, len(g), 50)
|
||||
]
|
||||
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
|
||||
# w['results'] = w.results.apply(lambda x: x[1:-1] )
|
||||
w.to_csv('./data/metadata/results.csv', index=False)
|
||||
with open('./data/metadata/results.txt', 'w') as outfile:
|
||||
outfile.write(json.dumps(txt))
|
||||
g = list(a.head(200).values)
|
||||
results = a.to_dict('records')
|
||||
for i in range(0, len(results), 100000):
|
||||
print(i)
|
||||
with open('./data/metadata/results/{}.json'.format(i), 'w') as outfile:
|
||||
json.dump(results[i:i+100000], outfile)
|
||||
|
||||
n = 50
|
||||
r = math.ceil(len(results) / n)
|
||||
for i in range(r):
|
||||
print('#{} / {}'.format(i, r))
|
||||
newd = {
|
||||
"model": {
|
||||
"blockchain": "solana",
|
||||
"sinks": [
|
||||
{
|
||||
"destination": "{database_name}.silver.nft_metadata",
|
||||
"type": "snowflake",
|
||||
"unique_key": "blockchain || contract_address || token_id"
|
||||
}
|
||||
],
|
||||
},
|
||||
"results": results[(i * n):((i * n)+r)]
|
||||
}
|
||||
data += [ json.dumps(newd) ]
|
||||
with open('./data/metadata/results/{}.txt'.format(collection, i), 'w') as outfile:
|
||||
outfile.write(json.dumps(newd))
|
||||
|
||||
|
||||
# results = []
|
||||
# for token_id in sorted(mdf.token_id.unique()):
|
||||
# if token_id % 1000 == 1:
|
||||
# print(token_id, len(results))
|
||||
# cur = mdf[mdf.token_id == token_id]
|
||||
# token_metadata = {}
|
||||
# # m = mints[(mints.collection == collection) & (mints.token_id == token_id) ]
|
||||
# m = metadata[(metadata.collection == collection) & (metadata.token_id == token_id) ]
|
||||
# m = m.fillna('None')
|
||||
# if not len(m):
|
||||
# print(token_id)
|
||||
# continue
|
||||
# # mint_address = m.mint_address.values[0] if 'mint_address' in m.columns else ''
|
||||
# mint_address = m.mint_address.values[0]
|
||||
# for row in cur.iterrows():
|
||||
# row = row[1]
|
||||
# token_metadata[row['feature_name']] = row['feature_value']
|
||||
|
||||
# d = {
|
||||
# 'commission_rate': None
|
||||
# , 'mint_address': mint_address
|
||||
# , 'token_id': token_id
|
||||
# , 'contract_address': mint_address
|
||||
# , 'contract_name': row['collection']
|
||||
# , 'created_at_block_id': 0
|
||||
# , 'created_at_timestamp': str('2021-01-01')
|
||||
# , 'created_at_tx_id': ''
|
||||
# , 'creator_address': mint_address
|
||||
# , 'creator_name': row['collection']
|
||||
# , 'image_url': row['image_url']
|
||||
# , 'project_name': row['collection']
|
||||
# , 'token_id': int(token_id)
|
||||
# , 'token_metadata': token_metadata
|
||||
# , 'token_metadata_uri': row['image_url']
|
||||
# , 'token_name': row['collection']
|
||||
# }
|
||||
# results.append(d)
|
||||
print('Uploading {} results'.format(len(results)))
|
||||
|
||||
dir = './data/metadata/{}/'.format(collection)
|
||||
@ -466,6 +618,7 @@ def solana():
|
||||
},
|
||||
"results": results[(i * n):((i * n)+r)]
|
||||
}
|
||||
data += [ json.dumps(newd) ]
|
||||
with open('./data/metadata/{}/{}.txt'.format(collection, i), 'w') as outfile:
|
||||
outfile.write(json.dumps(newd))
|
||||
|
||||
|
||||
812
load_data.py
812
load_data.py
@ -1,9 +1,8 @@
|
||||
import collections
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import math
|
||||
from tkinter import SEL
|
||||
import requests
|
||||
import pandas as pd
|
||||
import urllib.request
|
||||
@ -11,11 +10,24 @@ import snowflake.connector
|
||||
from bs4 import BeautifulSoup
|
||||
from time import sleep
|
||||
|
||||
import cloudscraper
|
||||
|
||||
from theblockchainapi import SolanaAPIResource, SolanaNetwork, SearchMethod
|
||||
|
||||
# Get an API key pair for free here: https://dashboard.blockchainapi.com/api-keys
|
||||
MY_API_KEY_ID = 'sLbjx8YFYdTtUuH'
|
||||
MY_API_SECRET_KEY = 'p24pFaM9lLbWscN'
|
||||
BLOCKCHAIN_API_RESOURCE = SolanaAPIResource(
|
||||
api_key_id=MY_API_KEY_ID,
|
||||
api_secret_key=MY_API_SECRET_KEY
|
||||
)
|
||||
|
||||
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
||||
|
||||
from solana_model import just_float
|
||||
from utils import clean_name, clean_token_id, format_num
|
||||
from utils import clean_name, clean_token_id, format_num, merge
|
||||
|
||||
|
||||
|
||||
#########################
|
||||
# Connect to DB #
|
||||
@ -100,6 +112,19 @@ def add_collection_steps():
|
||||
# 5. run model
|
||||
pass
|
||||
|
||||
def create_upload_file():
|
||||
cols = [ 'collection','mint_address' ]
|
||||
a = pd.read_csv('./data/mints-2022-06-13-2pm.csv')[cols]
|
||||
b = pd.read_csv('~/Downloads/manual_labels.csv')
|
||||
b.columns = cols
|
||||
c = pd.read_csv('~/Downloads/solscan_collections.csv')[cols]
|
||||
d = pd.read_csv('./data/tokens.csv')[cols]
|
||||
df = pd.concat([a, b, c, d]).drop_duplicates(subset=['mint_address'], keep='last')
|
||||
df.to_csv('~/Downloads/mints-2022-06-13-5pm.csv', index=False)
|
||||
tmp = pd.read_csv('~/Downloads/mints-2022-06-13-5pm.csv')
|
||||
tmp[tmp.mint_address == 'EhuVN896QVypRreAt6mcJr6eKkKunVzsgSRz7qt4oeBr']
|
||||
|
||||
|
||||
def manual_clean():
|
||||
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
|
||||
df = pd.read_csv('./data/{}.csv'.format(c))
|
||||
@ -108,7 +133,149 @@ def manual_clean():
|
||||
df['clean_token_id'] = df.token_id
|
||||
df.to_csv('./data/{}.csv'.format(c), index=False)
|
||||
|
||||
|
||||
def pull_from_solscan():
|
||||
|
||||
todo = [
|
||||
['50a75e6d3d0b6d4a72b2f745fdba4b1c28bc774ca9629fe8e36053ae2fb396f8','Degen Egg']
|
||||
, ['45e3f45d695e9e8775eed480cb0f5a6a957d47dcb3ed3800e454846dca9ab7fc','Genopets']
|
||||
, ['a437071c6f9679e8431a072ae39421262bf289cc6ead21e38190d5b7b409e7f7','Shin Sengoku']
|
||||
, ['d38349f2704e8cd1c538cc48fbea4b3e2596ac8da14b62c0eb3c07aeda7ae75e','SolStein']
|
||||
, ['9e0593a4842ceb9ccdc510e6ffdf0d84f736bff2b58d5803c5002ace17df9fe0','Zillaz NFT']
|
||||
, ['895d8f01108fbb6b28c5e32027c9c98e3054241927c8e59c304fa4763c5c88ea','enviroPass Tier 02']
|
||||
, ['59c2a35d902f85feec4c774df503a0df2be263f763dcbcb73bce50c999fc2c78','The Fracture']
|
||||
, ['e8dfb059b1dfc71cf97342a1c46793bc5e154909416a93a155929da5bba44a57','Suteki']
|
||||
, ['271e0d68d069d80afbcb916e877831b060933b97e7b02e1cfb77e74b228b4745','Chillchat']
|
||||
]
|
||||
start = time.time()
|
||||
data = []
|
||||
meta = []
|
||||
it = 0
|
||||
tot = len(todo)
|
||||
for collectionId, collection in todo:
|
||||
it += 1
|
||||
print('#{} / {}'.format(it, tot))
|
||||
# collectionId = j['data']['collectionId']
|
||||
# collection = j['data']['collection']
|
||||
offset = 0
|
||||
limit = 500
|
||||
while True:
|
||||
print(offset)
|
||||
url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
|
||||
r = requests.get(url)
|
||||
js = r.json()['data']
|
||||
offset += limit
|
||||
if len(js) == 0:
|
||||
break
|
||||
for j in js:
|
||||
data += [[ collectionId, collection, j['info']['mint'] ]]
|
||||
m = j['info']['meta']
|
||||
m['mint_address'] = j['info']['mint']
|
||||
# m['name'] = row['name']
|
||||
# m['update_authority'] = update_authority
|
||||
meta += [ m ]
|
||||
it += 1
|
||||
end = time.time()
|
||||
print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
|
||||
df = pd.DataFrame(data, columns=['collection_id','collection','mint_address'])
|
||||
df.to_csv('~/Downloads/solscan_collections.csv', index=False)
|
||||
df[['collection','mint_address']].to_csv('~/Downloads/mints-2022-06-14-8am.csv', index=False)
|
||||
df.groupby('collection').mint_address.count()
|
||||
|
||||
def collecitons_from_missing_tokens():
|
||||
query = '''
|
||||
WITH base AS (
|
||||
SELECT block_timestamp::date AS date
|
||||
, s.*
|
||||
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
|
||||
FROM solana.fact_nft_sales s
|
||||
LEFT JOIN solana.dim_labels l on s.mint = l.address
|
||||
WHERE marketplace in ('magic eden v1', 'magic eden v2')
|
||||
AND block_timestamp >= '2022-01-01'
|
||||
AND l.address IS NULL
|
||||
AND sales_amount >= 10
|
||||
)
|
||||
SELECT *
|
||||
FROM base
|
||||
WHERE rn % 20 = 0
|
||||
ORDER BY sales_amount DESC
|
||||
LIMIT 500
|
||||
'''
|
||||
missing = ctx.cursor().execute(query)
|
||||
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
|
||||
missing = clean_colnames(missing)
|
||||
missing.head()
|
||||
|
||||
headers = {
|
||||
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
|
||||
}
|
||||
it = 0
|
||||
tot = len(missing)
|
||||
data = []
|
||||
for m in missing.mint.unique():
|
||||
it += 1
|
||||
if it % 10 == 0:
|
||||
print('#{} / {} ({})'.format(it, tot, len(data)))
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(m)
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
data.append(j)
|
||||
pass
|
||||
df = pd.DataFrame(data)
|
||||
df.head()[['collection','mintAddress']]
|
||||
df.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
need = df.groupby(['collection','updateAuthority']).mintAddress.count().reset_index().sort_values('mintAddress', ascending=0)
|
||||
need = need[need.mintAddress > 1].rename(columns={'updateAuthority':'update_authority'})
|
||||
need.to_csv('~/Downloads/missing.csv', index=False)
|
||||
need.head()
|
||||
sorted(need.collection.unique())
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('_', ' ', x.title()).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
|
||||
|
||||
us = sorted(g[g.mintAddress > 1].updateAuthority.unique())
|
||||
tot = len(us)
|
||||
it = 0
|
||||
for u in us:
|
||||
it += 1
|
||||
print('#{} / {} ({})'.format(it, tot, len(data)))
|
||||
|
||||
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
|
||||
update_authority = u
|
||||
, update_authority_search_method = SearchMethod.EXACT_MATCH
|
||||
)
|
||||
print(u, len(nfts))
|
||||
for n in nfts:
|
||||
m = n['nft_metadata']
|
||||
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
|
||||
|
||||
def manual_tags():
|
||||
d = {
|
||||
'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g': 'Degenerate Ape Kindergarten'
|
||||
, 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF': 'Botheads'
|
||||
}
|
||||
a = 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF'
|
||||
c = 'Botheads'
|
||||
labels = pd.DataFrame()
|
||||
for a, c in d.items():
|
||||
query = '''
|
||||
SELECT DISTINCT instructions[1]:parsed:info:mint::string AS mint_address
|
||||
FROM solana.fact_transactions
|
||||
WHERE instructions[1]:parsed:info:mintAuthority = '{}'
|
||||
'''.format(a)
|
||||
df = ctx.cursor().execute(query)
|
||||
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
|
||||
df = clean_colnames(df)
|
||||
df['collection'] = c
|
||||
labels = labels.append(df)
|
||||
labels.to_csv('~/Downloads/manual_labels.csv', index=False)
|
||||
|
||||
def mints_from_me():
|
||||
##################################
|
||||
# Get All ME Collections #
|
||||
##################################
|
||||
headers = {
|
||||
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
|
||||
}
|
||||
@ -144,6 +311,9 @@ def mints_from_me():
|
||||
# lp_df.to_csv('./data/me_lp_collections.csv', index=False)
|
||||
# lp_df = pd.read_csv('./data/me_lp_collections.csv')
|
||||
|
||||
###########################################
|
||||
# Get 1 Mint From Each Collection #
|
||||
###########################################
|
||||
it = 0
|
||||
l_data = []
|
||||
old_l_df = pd.read_csv('./data/me_mints.csv')
|
||||
@ -154,7 +324,7 @@ def mints_from_me():
|
||||
it += 1
|
||||
row = row[1]
|
||||
print('Listings on {}...'.format(row['symbol']))
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/listings?offset=0&limit=1'.format(row['symbol'])
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/activities?offset=0&limit=1'.format(row['symbol'])
|
||||
if row['symbol'] in seen:
|
||||
print('Seen')
|
||||
continue
|
||||
@ -218,9 +388,38 @@ def mints_from_me():
|
||||
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
|
||||
# l_df.to_csv('./data/me_mints.csv', index=False)
|
||||
|
||||
# get missing collections
|
||||
query = '''
|
||||
WITH base AS (
|
||||
SELECT block_timestamp::date AS date
|
||||
, s.*
|
||||
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
|
||||
FROM solana.fact_nft_sales s
|
||||
LEFT JOIN solana.dim_labels l on s.mint = l.address
|
||||
WHERE marketplace in ('magic eden v1', 'magic eden v2')
|
||||
AND block_timestamp >= '2022-01-01'
|
||||
AND block_timestamp <= '2022-05-20'
|
||||
AND l.address IS NULL
|
||||
AND sales_amount > 20
|
||||
)
|
||||
SELECT *
|
||||
FROM base
|
||||
WHERE rn % 50 = 1
|
||||
LIMIT 100
|
||||
'''
|
||||
missing = ctx.cursor().execute(query)
|
||||
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
|
||||
missing = clean_colnames(missing)
|
||||
|
||||
######################################################
|
||||
# Get Update Authorities For All Collections #
|
||||
######################################################
|
||||
l_df = pd.read_csv('./data/me_mints.csv')
|
||||
len(l_df)
|
||||
l_df.head()
|
||||
m_old = pd.read_csv('./data/me_update_authorities.csv')
|
||||
m_data = list(m_old.values)
|
||||
m_old['seen'] = 1
|
||||
m_data = list(m_old[['symbol','name','update_authority']].values)
|
||||
seen = [ x[0] for x in m_data ]
|
||||
print('Seen {} m_data'.format(len(seen)))
|
||||
l_df = l_df[-l_df.symbol.isin(seen)]
|
||||
@ -258,19 +457,583 @@ def mints_from_me():
|
||||
m_df.to_csv('./data/me_update_authorities.csv', index=False)
|
||||
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
|
||||
m_df = m_df.drop_duplicates()
|
||||
print('Adding {} rows to me_mints'.format(len(m_df) - len(m_old)))
|
||||
print('Adding {} rows to me_update_authorities'.format(len(m_df) - len(m_old)))
|
||||
m_df.to_csv('./data/me_update_authorities.csv', index=False)
|
||||
m_df.tail(134).head(20)
|
||||
m_df = m_df.tail(134)
|
||||
|
||||
|
||||
query = '''
|
||||
SELECT DISTINCT project_name, LOWER(project_name) AS lower_name
|
||||
FROM crosschain.address_labels
|
||||
WHERE blockchain = 'solana'
|
||||
AND label_subtype = 'nf_token_contract'
|
||||
AND project_name IS NOT NULL
|
||||
'''
|
||||
|
||||
labels = ctx.cursor().execute(query)
|
||||
labels = pd.DataFrame.from_records(iter(labels), columns=[x[0] for x in labels.description])
|
||||
labels = clean_colnames(labels)
|
||||
labels.to_csv('~/Downloads/tmp-la.csv', index=False)
|
||||
|
||||
######################################################
|
||||
# Get Update Authorities For All Collections #
|
||||
######################################################
|
||||
m_df = pd.read_csv('./data/me_update_authorities.csv')
|
||||
def f(x):
|
||||
x = re.sub('\(|\)', '', x)
|
||||
x = re.sub(' ', '_', x)
|
||||
x = re.sub('\'', '', x)
|
||||
return(x)
|
||||
m_df['collection'] = m_df.name.apply(lambda x: f(x) )
|
||||
m_df['seen'] = (-m_df.name.isin(m_df.name.tail(134).values)).astype(int)
|
||||
m_df['lower_name'] = m_df.name.apply(lambda x: x.lower() )
|
||||
seen = list(labels.lower_name.unique())
|
||||
m_df['seen'] = m_df.lower_name.isin(seen).astype(int)
|
||||
n_auth = m_df.groupby('update_authority').name.count().reset_index().rename(columns={'name':'n_auth'})
|
||||
m_df = m_df.merge(n_auth)
|
||||
len(m_df[m_df.seen == 0])
|
||||
len(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1)])
|
||||
len(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1)])
|
||||
|
||||
x = 'asf (asf)'
|
||||
f(x)
|
||||
m_df.to_csv('~/Downloads/tmp-m_df.csv', index=False)
|
||||
len(m_df.name.unique())
|
||||
|
||||
need = list(m_df[m_df.seen == 0].update_authority.unique())
|
||||
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1) ].update_authority.unique())
|
||||
len(need)
|
||||
# need = need + [
|
||||
# need = [
|
||||
# 'CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k' # taiyo
|
||||
# , 'DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX' # DAA
|
||||
# , 'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g' # Degen Egg
|
||||
# , 'BL5U8CoFPewr9jFcKf3kE1BhdFS1J59cwGpeZrm7ZTeP' # Skullbot
|
||||
# , 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf' # Boryoku
|
||||
# , '7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D' # mindfolk
|
||||
# , 'CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5' # jelly rascals
|
||||
# , 'EcxEqUj4RNgdGJwPE3ktsM99Ea9ThPmXHUV5g37Qm4ju' # women monkey
|
||||
# , 'EQSoRhbN9fEEYXKEE5Lg63Mqf17P3JydcWTvDhdMJW1N' # hydrascripts
|
||||
# , '75CPiM9ywLgxhii9SQsNoA1SH3h66o5EhrYsazHR5Tqk' # hydrascripts
|
||||
# , 'aury7LJUae7a92PBo35vVbP61GX8VbyxFKausvUtBrt' # aurory
|
||||
# , 'ET3LWbEL6q4aUSjsX5xLyWktCwqKh6qsQE5j6TDZtZBY' # enviropass
|
||||
# , '8ERR2gYrvXcJFuoNAbPRvHXtrJnAXXHgXKkVviwz9R6C' # enviroPass
|
||||
# , 'GRDCbZBP1x2JxYf3rQQoPFGzF57LDPy7XtB1gEMaCqGV' # Space Robots
|
||||
# , 'GenoS3ck8xbDvYEZ8RxMG3Ln2qcyoAN8CTeZuaWgAoEA' # Genopet
|
||||
# , 'STEPNq2UGeGSzCyGVr2nMQAzf8xuejwqebd84wcksCK' # stepn
|
||||
# , 'HcS8iaEHwUino8wKzcgC16hxHodnPCyacVYUdBaSZULP' # BASC
|
||||
# , 'AvkbtawpmMSy571f71WsWEn41ATHg5iHw27LoYJdk8QA' # THUG
|
||||
# , 'GH4QhJznKEHHv44AqEH5SUohkUauWyAFtu5u8zUWUKL4' # StepN Shoebox
|
||||
# , 'FTQmhcD7SNBWrVxTgQMFr7xL2aA6adfAJJPBxGKU4VsZ' # Solstien
|
||||
# ]
|
||||
need = m_df[m_df.update_authority.isin(need)]
|
||||
|
||||
# m_df[m_df.lower_name.isin(seen)]
|
||||
# m_df[-m_df.lower_name.isin(seen)]
|
||||
# tmp = m_df[['update_authority','collection']].drop_duplicates().groupby(['update_authority']).collection.count().reset_index().rename(columns={'collection':'n_collection'})
|
||||
# tmp = tmp.sort_values('n_collection', ascending=0)
|
||||
# m_df = m_df.merge(tmp)
|
||||
# m_df = m_df.sort_values(by=['n_collection','update_authority','collection'], ascending=[0,0,0])
|
||||
l_df = pd.read_csv('./data/me_mints.csv')
|
||||
fix = need.merge(l_df[[ 'name','mint_address' ]])
|
||||
# len(need.name.unique())
|
||||
# len(fix.name.unique())
|
||||
# fix = fix.sort_values(by=['update_authority','collection'], ascending=[0,0])
|
||||
# fix.head()
|
||||
|
||||
|
||||
# seen = []
|
||||
# data = []
|
||||
# meta = []
|
||||
|
||||
# fix = fix[-(fix.name.isin(seen))]
|
||||
# start = time.time()
|
||||
# it = 0
|
||||
# tot = len(fix)
|
||||
# scraper = cloudscraper.create_scraper()
|
||||
# # for each collection
|
||||
# for row in fix.iterrows():
|
||||
# row = row[1]
|
||||
# print(row['name'])
|
||||
# if row['name'] in seen:
|
||||
# print('Seen')
|
||||
# continue
|
||||
# url = 'https://api.solscan.io/nft/detail?mint={}'.format(row['mint_address'])
|
||||
# t = scraper.get(url).text
|
||||
# j = json.loads(t)
|
||||
# # r = requests.get(url)
|
||||
# # j = r.json()
|
||||
# j['data']
|
||||
# if not j['success']:
|
||||
# print('Error')
|
||||
# print(r)
|
||||
# print(j)
|
||||
# sleep(1)
|
||||
# continue
|
||||
# update_authority = j['data']['updateAuthority']
|
||||
# collectionId = j['data']['collectionId']
|
||||
# collection = j['data']['collection']
|
||||
# offset = 0
|
||||
# limit = 500
|
||||
# while True:
|
||||
# print(offset)
|
||||
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
|
||||
# r = requests.get(url)
|
||||
# js = r.json()['data']
|
||||
# offset += limit
|
||||
# if len(js) == 0:
|
||||
# break
|
||||
# for j in js:
|
||||
# data += [[ update_authority, collectionId, collection, row['symbol'], row['name'], row['collection'], j['info']['mint'] ]]
|
||||
# m = j['info']['meta']
|
||||
# m['mint_address'] = j['info']['mint']
|
||||
# m['name'] = row['name']
|
||||
# m['update_authority'] = update_authority
|
||||
# meta += [ m ]
|
||||
# it += 1
|
||||
# end = time.time()
|
||||
# print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
|
||||
|
||||
# old = pd.read_csv('./data/nft_label_tokens.csv')
|
||||
# token_df = pd.DataFrame(data, columns=['update_authority','collectionId','solscan_collection','symbol','name','collection','mint'])
|
||||
# token_df = token_df.append(old).drop_duplicates()
|
||||
# token_df.to_csv('./data/nft_label_tokens.csv', index=False)
|
||||
|
||||
# old = pd.read_csv('./data/nft_label_metadata.csv')
|
||||
# meta_df = pd.DataFrame(meta)
|
||||
# meta_df = meta_df.append(old).drop_duplicates()
|
||||
# meta_df.to_csv('./data/nft_label_metadata.csv', index=False)
|
||||
# seen = list(token_df.name.unique())
|
||||
|
||||
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
|
||||
# tmp[tmp.collection > 1]
|
||||
# m_df.head()
|
||||
# def f(x):
|
||||
# x = re.sub('\(|\)', '', x)
|
||||
# x = re.sub(' ', '_', x)
|
||||
# x = re.sub('\'', '', x)
|
||||
# return(x)
|
||||
# m_df['collection'] = m_df.name.apply(lambda x: f(x) )
|
||||
|
||||
# x = 'asf (asf)'
|
||||
# f(x)
|
||||
|
||||
# query = '''
|
||||
# WITH base AS (
|
||||
# SELECT *
|
||||
# , ROW_NUMBER() OVER (PARTITION BY project_name ORDER BY insert_date DESC) AS rn
|
||||
# FROM crosschain.address_labels
|
||||
# WHERE blockchain = 'solana'
|
||||
# AND label_subtype = 'nf_token_contract'
|
||||
# )
|
||||
# SELECT *
|
||||
# FROM base
|
||||
# '''
|
||||
|
||||
# examples = ctx.cursor().execute(query)
|
||||
# examples = pd.DataFrame.from_records(iter(examples), columns=[x[0] for x in examples.description])
|
||||
# examples = clean_colnames(examples)
|
||||
# examples.head()
|
||||
# examples[examples.address_name == 'paradisedao'].head()
|
||||
# examples[examples.address == 'GUXSatf5AAFKmuQgSgn4GoGzBEhwJ9WAQRxeVt1vZvkb'].head()
|
||||
# # m_df = pd.read_csv('./data/me_update_authorities.csv')
|
||||
# # fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
|
||||
# fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
|
||||
# len(m_df[m_df.n_collection > 1].name.unique())
|
||||
# len(fix.name.unique())
|
||||
|
||||
# j = list(fix.address.unique())
|
||||
# with open('./data/fix_mints.json', 'w') as f:
|
||||
# json.dump(j, f)
|
||||
|
||||
# seen = list(examples.address.unique())
|
||||
# seen = []
|
||||
# need = df[-df.mint_address.isin(seen)].sort_values(['collection','mint_address'])
|
||||
# CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k - taiyo
|
||||
# DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX - DAA
|
||||
# DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf - Boryoku
|
||||
# 7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D - mindfolk
|
||||
# CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5 - mindfolk
|
||||
need = fix.copy().rename(columns={'name':'collection'})
|
||||
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
|
||||
need = need.drop_duplicates(subset=['update_authority']).sort_values('collection')
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
|
||||
need.collection.unique()
|
||||
# need = need.drop_duplicates(subset=['collection']).sort_values('collection')
|
||||
n = 0
|
||||
# 1310 - 310
|
||||
# need = need.tail(n).head(300).tail(25)
|
||||
# need = need.tail(1009).head(17)
|
||||
# need = need.tail(1009 - 17).head(17)
|
||||
# 1-285, 1310-975
|
||||
len(need)
|
||||
# print(n)
|
||||
|
||||
mfiles = ['/data/mints/{}/{}_mint_accounts.json'.format(re.sub(' |-', '_', collection), update_authority) for collection, update_authority in zip(need.collection.values, need.update_authority.values) ]
|
||||
seen = [ x for x in mfiles if os.path.exists(x) ]
|
||||
seen = []
|
||||
|
||||
# for update authorities that have only 1 collection, we can just check metaboss once
|
||||
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
|
||||
# need = need.tail(400)
|
||||
it = 0
|
||||
tot = len(need)
|
||||
for row in need.iterrows():
|
||||
it += 1
|
||||
row = row[1]
|
||||
collection = row['collection']
|
||||
print('#{} / {}: {}'.format(it, tot, collection))
|
||||
# if collection in seen:
|
||||
# continue
|
||||
update_authority = row['update_authority']
|
||||
# print('Working on {}...'.format(collection))
|
||||
collection_dir = re.sub(' |-', '_', collection)
|
||||
|
||||
dir = './data/mints/{}/'.format(collection_dir)
|
||||
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
|
||||
if not os.path.exists(dir):
|
||||
print(collection)
|
||||
os.makedirs(dir)
|
||||
# elif len(os.listdir(dir)) and os.path.exists(mfile):
|
||||
# print('Already have {}.'.format(collection))
|
||||
# print('Seen')
|
||||
# continue
|
||||
seen.append(update_authority)
|
||||
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
|
||||
|
||||
# write the mints to csv
|
||||
data = []
|
||||
for path in os.listdir('./data/mints/'):
|
||||
if os.path.isdir('./data/mints/'+path):
|
||||
collection = re.sub('_', ' ', path).strip()
|
||||
for fname in os.listdir('./data/mints/'+path):
|
||||
f = './data/mints/'+path+'/'+fname
|
||||
if os.path.isfile(f) and '.json' in f:
|
||||
with open(f) as file:
|
||||
j = json.load(file)
|
||||
for m in j:
|
||||
data += [[ collection, m ]]
|
||||
df = pd.DataFrame(data, columns=['collection','mint_address'])
|
||||
df.collection.unique()
|
||||
df.to_csv('./data/single_update_auth_labels.csv', index=False)
|
||||
|
||||
################################
|
||||
# Multiple Authorities #
|
||||
################################
|
||||
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
|
||||
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1) ].update_authority.unique())
|
||||
need = m_df[m_df.update_authority.isin(need)]
|
||||
fix = need.merge(l_df[[ 'name','mint_address' ]])
|
||||
need = fix.copy().rename(columns={'name':'collection'})
|
||||
need = need.sort_values('collection').drop_duplicates(subset=['update_authority'], keep='first')
|
||||
i = 5
|
||||
sz = 112
|
||||
t = len(need) - (sz * (i - 1)) if sz * i > len(need) else sz
|
||||
print(t)
|
||||
need = need.head(sz * i).tail(t)
|
||||
# need = need.head(150 * 2).tail(150)
|
||||
# need = need.head(150 * 3).tail(150)
|
||||
# need = need.head(150 * 4).tail(150)
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
|
||||
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
|
||||
need.collection.unique()
|
||||
it = 0
|
||||
a = []
|
||||
print(i)
|
||||
for row in need.iterrows():
|
||||
it += 1
|
||||
# if it < 20:
|
||||
# continue
|
||||
# if it % 100 == 0:
|
||||
# print('#{}/{}'.format(it, len(m_df)))
|
||||
print('#{}/{}'.format(it, len(need)))
|
||||
row = row[1]
|
||||
collection = row['collection']
|
||||
if collection in seen:
|
||||
continue
|
||||
update_authority = row['update_authority']
|
||||
print('Working on {}...'.format(collection))
|
||||
collection_dir = re.sub(' |-', '_', collection)
|
||||
|
||||
dir = './data/mints/{}/'.format(collection_dir)
|
||||
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
|
||||
if not os.path.exists(dir):
|
||||
print(collection)
|
||||
os.makedirs(dir)
|
||||
# elif len(os.listdir(dir)) and os.path.exists(mfile):
|
||||
# print('Already have {}.'.format(collection))
|
||||
# print('Seen')
|
||||
# continue
|
||||
print('LETS GOOO')
|
||||
a.append(update_authority)
|
||||
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
|
||||
|
||||
# len(need)
|
||||
# len(need.drop_duplicates(subset=['mint_address']))
|
||||
# len(need.collection.unique())
|
||||
# tot = len(need.collection.unique())
|
||||
# it = 0
|
||||
# # for each collection, get all the mints from metaboss
|
||||
# for c in need.collection.unique():
|
||||
# it += 1
|
||||
# print('#{} / {}: {}'.format(it, tot, c))
|
||||
# dir = './data/fix_labels_1/{}/'.format(re.sub(' ', '_', c))
|
||||
odir = dir+'output/'
|
||||
# if not os.path.exists(dir):
|
||||
# print('Making dir {}'.format(dir))
|
||||
# os.makedirs(dir)
|
||||
if not os.path.exists(odir):
|
||||
print('Making dir {}'.format(odir))
|
||||
os.makedirs(odir)
|
||||
# elif os.path.exists(dir+'mints.json'):
|
||||
# print('Already Seen')
|
||||
# continue
|
||||
# ms = list(need[need.collection == c].mint_address.unique())
|
||||
# with open(dir+'mints.json', 'w') as f:
|
||||
# json.dump(ms, f)
|
||||
os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, mfile, odir ))
|
||||
|
||||
##################################################
|
||||
# Load All The Mints for Each Collection #
|
||||
##################################################
|
||||
# now that we have the mints, create a data frame with the info for each mint in each collection
|
||||
data = []
|
||||
seen = [ x[1] for x in data ]
|
||||
it = 0
|
||||
dirs = os.listdir('./data/mints/')
|
||||
for path in dirs:
|
||||
print(it)
|
||||
it += 1
|
||||
if os.path.isdir('./data/mints/'+path):
|
||||
collection = re.sub('_', ' ', path).strip()
|
||||
if not os.path.exists('./data/mints/'+path+'/output/'):
|
||||
continue
|
||||
fnames = os.listdir('./data/mints/'+path+'/output/')
|
||||
print(collection, len(fnames))
|
||||
for fname in fnames:
|
||||
f = './data/mints/'+path+'/output/'+fname
|
||||
if fname[:-5] in seen:
|
||||
continue
|
||||
if os.path.isfile(f) and '.json' in f:
|
||||
try:
|
||||
with open(f) as file:
|
||||
j = json.load(file)
|
||||
data += [[ collection, fname, j['name'], j['symbol'], j['uri'] ]]
|
||||
except:
|
||||
print('Error {}'.format(fname[:-5]))
|
||||
|
||||
##################################################
|
||||
# Load All The Mints for Each Collection #
|
||||
##################################################
|
||||
new_mints = pd.DataFrame(data, columns=['collection','mint_address','name','symbol','uri'])
|
||||
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
|
||||
n = len(new_mints[(new_mints.uri.isnull()) | (new_mints.uri == '')])
|
||||
tot = len(new_mints)
|
||||
pct = round(n * 100 / tot, 1)
|
||||
print('{} ({}%) rows have no uri'.format(n, pct))
|
||||
new_mints = new_mints[new_mints.uri != '']
|
||||
|
||||
# function to clean the name of each NFT (remove the number)
|
||||
def f_cn(x):
|
||||
if not x or x != x:
|
||||
return(x)
|
||||
if '#' in x[-6:]:
|
||||
x = ''.join(re.split('#', x)[:-1]).strip()
|
||||
elif bool(re.match('.+\s+[0-9]+', x)):
|
||||
x = ' '.join(re.split(' ', x)[:-1]).strip()
|
||||
return(x)
|
||||
new_mints['clean_name'] = new_mints.name.apply(lambda x: f_cn(x) )
|
||||
|
||||
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
|
||||
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
|
||||
a = new_mints.drop_duplicates(subset=['collection','clean_name','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
|
||||
symbol_only = a[a.uri > 10].collection.unique()
|
||||
b = new_mints[new_mints.collection.isin(symbol_only)].drop_duplicates(subset=['collection','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
|
||||
collection_only = b[b.uri > 10].collection.unique()
|
||||
|
||||
# now get the info for each collection-name-symbol combo
|
||||
g1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','clean_name','symbol']).head(1).reset_index()
|
||||
g2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','symbol']).head(1).reset_index()
|
||||
g3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ].groupby(['collection']).head(1).reset_index()
|
||||
g = g1.append(g2).append(g3).drop_duplicates(subset=['mint_address'])
|
||||
print('{} Total: {} all, {} collection-symbol {} collection'.format(len(g), len(g1), len(g2), len(g3)))
|
||||
g.to_csv('~/Downloads/tmp-g.csv', index=False)
|
||||
|
||||
# iterate over each row to get what collection they are actually in
|
||||
# by pulling data from the uri
|
||||
uri_data = []
|
||||
it = 0
|
||||
tot = len(g)
|
||||
print(tot)
|
||||
errs = []
|
||||
seen = [ x['uri'] for x in uri_data ]
|
||||
# for row in g.iterrows():
|
||||
for row in g[ -(g.uri.isin(seen)) ].iterrows():
|
||||
row = row[1]
|
||||
it += 1
|
||||
if it % 100 == 0:
|
||||
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
|
||||
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
|
||||
print('#{} / {}: {}'.format(it, tot, row['collection']))
|
||||
try:
|
||||
r = requests.get(row['uri'])
|
||||
j = r.json()
|
||||
j['uri'] = row['uri']
|
||||
j['row_collection'] = row['collection']
|
||||
j['row_clean_name'] = row['clean_name']
|
||||
j['row_symbol'] = row['symbol']
|
||||
j['mint_address'] = row['mint_address']
|
||||
uri_data += [j]
|
||||
except:
|
||||
print('Error')
|
||||
errs.append(row)
|
||||
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
|
||||
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
|
||||
|
||||
# for each row, parse the json from the uri
|
||||
uri_df = pd.read_csv('~/Downloads/uri_df.csv')
|
||||
def f(x, c):
|
||||
x = str(x)
|
||||
try:
|
||||
n = json.loads(re.sub("'", "\"", x))[c]
|
||||
if type(n) == list:
|
||||
return(n[0])
|
||||
return(n)
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[c])
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[0][c])
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[0])
|
||||
except:
|
||||
return(x)
|
||||
# parse the json more
|
||||
uri_df['parsed_collection'] = uri_df.collection.apply(lambda x: f(x, 'name') )
|
||||
uri_df['parsed_family'] = uri_df.collection.apply(lambda x: f(x, 'family') )
|
||||
uri_df['clean_name'] = uri_df.name.apply( lambda x: f_cn(x) )
|
||||
# calculate what the collection name is
|
||||
uri_df['use_collection'] = uri_df.parsed_collection.replace('', None).fillna( uri_df.clean_name )#.fillna( uri_df.row_symbol )
|
||||
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].head()
|
||||
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].to_csv('~/Downloads/tmp.csv', index=False)
|
||||
len(uri_df)
|
||||
|
||||
# clean the collection name
|
||||
def f1(x):
|
||||
try:
|
||||
if len(x['use_collection']) == 1:
|
||||
return(x['clean_name'])
|
||||
if bool(re.match('.+\s+#[0-9]+', x['use_collection'])):
|
||||
return(''.join(re.split('#', x['use_collection'])[:-1]).strip())
|
||||
if '{' in x['use_collection']:
|
||||
return(x['clean_name'])
|
||||
return(x['use_collection'].strip().title())
|
||||
except:
|
||||
return(x['use_collection'].strip().title())
|
||||
uri_df['tmp'] = uri_df.apply(lambda x: f1(x), 1 )
|
||||
uri_df[uri_df.tmp == 'Nan']['use_collection','tmp']
|
||||
uri_df['use_collection'] = uri_df.apply(lambda x: f1(x), 1 )
|
||||
sorted(uri_df.use_collection.unique())[:20]
|
||||
sorted(uri_df.use_collection.unique())[-20:]
|
||||
|
||||
# clean the mint_address
|
||||
uri_df['mint_address'] = uri_df.mint_address.apply(lambda x: re.sub('.json','', x))
|
||||
uri_df.head()
|
||||
uri_df = uri_df.fillna('None')
|
||||
|
||||
for i in range(2):
|
||||
# for each collection-name-symbol combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','row_clean_name','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_clean_name','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_1'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
# for each collection-symbol combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_2'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
# for each collection combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','use_collection']].drop_duplicates().groupby(['row_collection']).use_collection.count().reset_index().rename(columns={'use_collection':'n_3'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
uri_df['n'] = uri_df.apply(lambda x: x['n_3'] if x['row_collection'] in collection_only else x['n_2'] if x['row_collection'] in symbol_only else x['n_1'], 1 )
|
||||
print('{} / {} ({}%) have multiple collection-name-symbol mappings'.format(len(uri_df[uri_df.n > 1]), len(uri_df), round( 100.0 * len(uri_df[uri_df.n > 1]) / len(uri_df))))
|
||||
|
||||
# if there is multiple, use the parsed_family instead of the use_collection
|
||||
uri_df['use_collection'] = uri_df.apply(lambda x: x['use_collection'] if x['n'] == 1 else x['parsed_family'], 1 )
|
||||
del uri_df['n_1']
|
||||
del uri_df['n_2']
|
||||
del uri_df['n_3']
|
||||
|
||||
# only take rows where there is a single mapping
|
||||
m = uri_df[uri_df.n==1][[ 'use_collection','row_collection','row_clean_name','row_symbol' ]].dropna().drop_duplicates()
|
||||
m.columns = [ 'use_collection','collection','clean_name','symbol' ]
|
||||
|
||||
m_1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].fillna('').merge(m.fillna(''), how='left')
|
||||
m_2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address','symbol' ]].fillna('').merge(m.fillna(''), how='left')
|
||||
m_3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address' ]].fillna('').merge(m.fillna(''), how='left')
|
||||
len(m_1) + len(m_2) + len(m_3)
|
||||
len(new_mints)
|
||||
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
|
||||
m = m_1.append(m_2).append(m_3)
|
||||
print('After all this, we have {}% of the mints'.format( round(len(m) * 100 / len(new_mints)) ))
|
||||
len(new_mints)
|
||||
len(m)
|
||||
m['mint_address'] = m.mint_address.apply(lambda x: re.sub('.json', '', x) )
|
||||
m = m[['mint_address','use_collection']].dropna().drop_duplicates()
|
||||
m.columns = ['mint_address','collection']
|
||||
|
||||
m[m.collection.isnull()].head()
|
||||
m[m.collection=='Nan'].head()
|
||||
|
||||
m = m[m.collection != 'Nan']
|
||||
|
||||
tmp = m.groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
|
||||
tmp.head()
|
||||
|
||||
m.to_csv('./data/mult_update_auth_labels.csv', index=False)
|
||||
################
|
||||
# DONE #
|
||||
################
|
||||
|
||||
|
||||
|
||||
tokens = m.append(pd.read_csv('./data/tokens.csv')[['collection','mint_address']]).drop_duplicates(subset=['mint_address'], keep='last')
|
||||
tokens.to_csv('./data/mints-2022-06-13-2pm.csv', index=False)
|
||||
|
||||
tokens.head()
|
||||
|
||||
m.to_csv('./data/mints-2022-06-09.csv', index=False)
|
||||
m = pd.read_csv('./data/mints-2022-06-09.csv')
|
||||
m.groupby('collection').head(1).to_csv('~/Downloads/tmp.csv', index=False)
|
||||
len(m)
|
||||
len(m.mint_address.unique())
|
||||
m.head()
|
||||
m.head()
|
||||
# m = m.merge(symbol_map, how='left', on='symbol')
|
||||
# m['use_collection'] = m.use_collection_x.fillna(m.use_collection_y)
|
||||
len(new_mints)
|
||||
len(m)
|
||||
len(m[m.use_collection.isnull()])
|
||||
len(m[m.use_collection.isnull()]) / len(m)
|
||||
len(m[m.use_collection_x.isnull()]) / len(m)
|
||||
m[m.use_collection.isnull()].fillna('').drop_duplicates(subset=['collection','clean_name','symbol']).to_csv('~/Downloads/tmp-3.csv', index=False)
|
||||
m[m.use_collection.isnull()].drop_duplicates(subset=['collection']).to_csv('~/Downloads/tmp-3.csv', index=False)
|
||||
|
||||
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')].groupby('row_clean_name').uri.count().reset_index()
|
||||
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')]
|
||||
uri_df.head()
|
||||
uri_df['row_clean_name'] = uri_df.row_clean_name.apply(lambda x: f_cn(x) )
|
||||
id_map = uri_df
|
||||
a.to_csv('~/Downloads/tmp-1.csv', index=False)
|
||||
len(uri_df)
|
||||
n = uri_df.groupby()
|
||||
uri_df
|
||||
uri_df
|
||||
uri_df.head()
|
||||
uri_df[['symbol','collection','']]
|
||||
uri_df.head()
|
||||
|
||||
query = '''
|
||||
SELECT DISTINCT project_name
|
||||
@ -294,6 +1057,26 @@ def mints_from_me():
|
||||
[x for x in seen if not x in m_df.tmp.unique()][:11]
|
||||
m_df[m_df.symbol == 'apesquad']
|
||||
m_df[m_df.symbol == 'chimp_frens']
|
||||
url = 'https://api.solscan.io/nft/detail?mint=D5pT5HYPeQkHD6ryoHxnc2jdcUMYmjs6sS6LswbSDsuy'
|
||||
us = sorted(m_df[m_df.n_collection > 1].update_authority.unique())
|
||||
u = us[1]
|
||||
m_df[m_df.update_authority == u]
|
||||
m_df[m_df.mint == 'G3xiAFZEp49BJc8nNrDJxwTXZ34teKH7CRf5KTGakxte']
|
||||
data = []
|
||||
for u in us[:10]:
|
||||
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
|
||||
update_authority = u
|
||||
, update_authority_search_method = SearchMethod.EXACT_MATCH
|
||||
)
|
||||
print(u, len(nfts))
|
||||
for n in nfts:
|
||||
m = n['nft_metadata']
|
||||
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
|
||||
nft_df = pd.DataFrame(data, columns=['update_authority','mint','symbol','name'])
|
||||
len(nft_df.update_authority.unique())
|
||||
nft_df['collection'] = nft_df.name.apply(lambda x: re.split('#', x)[0].strip() )
|
||||
nft_df.groupby(['symbol','collection']).mint.count()
|
||||
nft_df.groupby(['symbol','name']).mint.count()
|
||||
print(len(seen))
|
||||
# m_df = m_df.merge(lp_df)
|
||||
len(m_df)
|
||||
@ -335,7 +1118,6 @@ def mints_from_me():
|
||||
# os.makedirs(dir_mints)
|
||||
# os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, fname, dir_mints))
|
||||
|
||||
|
||||
data = []
|
||||
for path in os.listdir('./data/mints/'):
|
||||
if os.path.isdir('./data/mints/'+path):
|
||||
|
||||
Binary file not shown.
143
prepare_data.py
143
prepare_data.py
@ -1,12 +1,132 @@
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import snowflake.connector
|
||||
|
||||
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
||||
|
||||
from solana_model import get_sales
|
||||
from scrape_sol_nfts import clean_name
|
||||
|
||||
def get_ctx():
|
||||
usr = os.getenv('SNOWFLAKE_USR')
|
||||
pwd = os.getenv('SNOWFLAKE_PWD')
|
||||
# with open('snowflake.pwd', 'r') as f:
|
||||
# pwd = f.readlines()[0].strip()
|
||||
# with open('snowflake.usr', 'r') as f:
|
||||
# usr = f.readlines()[0].strip()
|
||||
|
||||
ctx = snowflake.connector.connect(
|
||||
user=usr,
|
||||
password=pwd,
|
||||
account='vna27887.us-east-1'
|
||||
)
|
||||
return(ctx)
|
||||
|
||||
def clean_colnames(df):
|
||||
names = [ x.lower() for x in df.columns ]
|
||||
df.columns = names
|
||||
return(df)
|
||||
|
||||
def overlap():
|
||||
query = '''
|
||||
WITH sales AS (
|
||||
SELECT l.label AS collection, SUM(sales_amount) AS volume, MIN(block_timestamp::date) AS first_sale_date
|
||||
FROM solana.fact_nft_sales s
|
||||
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(s.mint)
|
||||
WHERE block_timestamp >= CURRENT_DATE - 30
|
||||
GROUP BY 1
|
||||
), base AS (
|
||||
SELECT *
|
||||
, ROW_NUMBER() OVER (ORDER BY volume DESC) AS volume_rank
|
||||
FROM sales
|
||||
ORDER BY volume DESC
|
||||
LIMIT 50
|
||||
), b2 AS (
|
||||
SELECT DISTINCT collection, first_sale_date, volume_rank, purchaser, mint
|
||||
FROM solana.fact_nft_sales s
|
||||
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(s.mint)
|
||||
JOIN base b ON b.collection = l.label
|
||||
UNION
|
||||
SELECT DISTINCT collection, first_sale_date, volume_rank, purchaser, mint
|
||||
FROM solana.fact_nft_mints m
|
||||
JOIN solana.dim_labels l ON LOWER(l.address) = LOWER(m.mint)
|
||||
JOIN base b ON b.collection = l.label
|
||||
)
|
||||
SELECT DISTINCT INITCAP(collection) AS collection, first_sale_date, date_trunc('month', first_sale_date) AS first_sale_month, volume_rank, purchaser, mint
|
||||
FROM b2
|
||||
'''
|
||||
ctx = get_ctx()
|
||||
df = ctx.cursor().execute(query)
|
||||
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
|
||||
df = clean_colnames(df)
|
||||
df[df.collection == 'okay bears']
|
||||
len(df[df.collection == 'okay bears'].mint.unique())
|
||||
data = []
|
||||
list(df.collection.unique()).index(a)
|
||||
list(df.collection.unique()).index(b)
|
||||
cur = df[df.volume_rank <= 50]
|
||||
for a in cur.collection.unique():
|
||||
print(a)
|
||||
a1 = set(cur[cur.collection == a].purchaser.unique())
|
||||
ar = cur[cur.collection == a].volume_rank.values[0]
|
||||
am = cur[cur.collection == a].first_sale_month.values[0]
|
||||
# for b in cur[cur.collection > a].collection.unique():
|
||||
for b in cur.collection.unique():
|
||||
b1 = set(cur[cur.collection == b].purchaser.unique())
|
||||
br = cur[cur.collection == b].volume_rank.values[0]
|
||||
bm = cur[cur.collection == b].first_sale_month.values[0]
|
||||
data += [[ a, b, int(a < b), am, bm, ar, br, len(a1), len(b1), len(a1.intersection(b1)) ]]
|
||||
cur = pd.DataFrame(data, columns=['col_1','col_2','include','am','bm','r_1','r_2','n_1','n_2','n_int'])
|
||||
cur['pct'] = cur.apply(lambda x: x['n_int'] / min(x['n_1'], x['n_2']), 1 )
|
||||
cur = cur[cur.n_int.notnull()]
|
||||
cur.to_csv('~/Downloads/overlap.csv', index=False)
|
||||
cur.include.unique()
|
||||
|
||||
|
||||
def add_back_metadata():
|
||||
query = '''
|
||||
SELECT *
|
||||
FROM solana.dim_nft_metadata
|
||||
WHERE LOWER(project_name) IN (
|
||||
'degods'
|
||||
, 'astrals'
|
||||
, 'solstein'
|
||||
, 'solgods'
|
||||
, 'okay bears'
|
||||
, 'meerkat millionaires'
|
||||
, 'catalina whale mixer'
|
||||
, 'citizens by solsteads'
|
||||
, 'defi pirates'
|
||||
)
|
||||
'''
|
||||
ctx = get_ctx()
|
||||
mdf = ctx.cursor().execute(query)
|
||||
mdf = pd.DataFrame.from_records(iter(mdf), columns=[x[0] for x in mdf.description])
|
||||
print('Loaded {} metadata'.format(len(mdf)))
|
||||
mdf = clean_colnames(mdf)
|
||||
mdf = mdf[[ 'contract_name','token_id','token_metadata' ]]
|
||||
m = json.loads(mdf.token_metadata.values)
|
||||
m = [json.loads(x) for x in mdf.token_metadata.values]
|
||||
data = []
|
||||
collection = mdf.contract_name.values
|
||||
token_id = mdf.token_id.values
|
||||
for i in range(len(m)):
|
||||
for k, v in m[i].items():
|
||||
data += [[ collection[i], token_id[i], k, v ]]
|
||||
old = pd.read_csv('./data/metadata.csv')
|
||||
metadata = pd.DataFrame(data, columns=['collection','token_id','feature_name','feature_value'])
|
||||
del old['chain']
|
||||
old = old.append(metadata)
|
||||
old['collection'] = old.collection.apply(lambda x: clean_name(x) )
|
||||
old = old.drop_duplicates(subset=['collection','token_id','feature_name'], keep='last')
|
||||
old[old.collection == 'Cets On Creck'].feature_name.unique()
|
||||
old[old.collection == 'Cets on Creck'].feature_name.unique()
|
||||
tmp = old[['collection','feature_name']].drop_duplicates().groupby('collection').feature_name.count().reset_index()
|
||||
tmp.to_csv('~/Downloads/tmp-1.csv', index=False)
|
||||
old.to_csv('./data/metadata.csv', index=False)
|
||||
|
||||
def add_sf_metadata():
|
||||
old = pd.read_csv('./data/metadata.csv')
|
||||
l0 = len(old)
|
||||
@ -141,8 +261,25 @@ def add_att_count():
|
||||
print('Adding {} rows'.format(l1 - l0))
|
||||
m_df.to_csv('./data/metadata.csv', index=False)
|
||||
|
||||
|
||||
def tmp():
|
||||
m1 = pd.read_csv('./data/metadata.csv')
|
||||
m2 = pd.read_csv('./data/metadata_2.csv')
|
||||
t1 = pd.read_csv('./data/tokens.csv')
|
||||
t2 = pd.read_csv('./data/tokens_2.csv')
|
||||
m = m1.append(m2).drop_duplicates(keep='last')
|
||||
t = t1.append(t2).drop_duplicates(keep='last')
|
||||
t.to_csv('./data/tokens.csv', index=False)
|
||||
m.to_csv('./data/metadata.csv', index=False)
|
||||
|
||||
def add_rarities():
|
||||
include = [ 'DeGods' ]
|
||||
m_df = pd.read_csv('./data/metadata.csv')
|
||||
# m_df = m_df[-m_df.collection.isin([''])]
|
||||
g0 = m_df.groupby('collection').token_id.count().reset_index()
|
||||
|
||||
m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
|
||||
# m_df = m_df[m_df.collection.isin(include)]
|
||||
# m_df['feature_name'] = m_df.feature_name.fillna(m_df.name)
|
||||
# m_df['feature_value'] = m_df.feature_value.fillna(m_df.value)
|
||||
for c in [ 'name','value','rarity' ]:
|
||||
@ -164,6 +301,8 @@ def add_rarities():
|
||||
# m_df[m_df.collection == 'BAYC'].feature_name.unique()
|
||||
|
||||
tokens = pd.read_csv('./data/tokens.csv')[['collection','token_id','nft_rank']]
|
||||
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
|
||||
# tokens = tokens[tokens.collection.isin(include)]
|
||||
tokens[((tokens.collection == 'Pesky Penguins')) & (tokens.token_id=='6437')]
|
||||
tokens[((tokens.collection == 'Pesky Penguins')) & (tokens.token_id==6437)]
|
||||
tokens[tokens.collection == 'SOLGods']
|
||||
@ -287,6 +426,10 @@ def add_rarities():
|
||||
sorted(m_df.collection.unique())
|
||||
|
||||
l1 = len(m_df)
|
||||
g1 = m_df.groupby('collection').token_id.count().reset_index()
|
||||
g = g0.merge(g1, how='outer', on=['collection'])
|
||||
g['dff'] = g.token_id_y - g.token_id_x
|
||||
print(g[g.dff != 0].sort_values('dff', ascending=0))
|
||||
print('Adding {} rows'.format(l1 - l0))
|
||||
# m_df[m_df.collection == 'Galactic Angels']
|
||||
# m_df[ (m_df.collection == 'Galactic Angels') & (m_df.token_id == '1') ]
|
||||
|
||||
@ -23,7 +23,7 @@ import cloudscraper
|
||||
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
|
||||
os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
|
||||
|
||||
from utils import clean_token_id, merge, clean_name
|
||||
from utils import clean_token_id, get_ctx, merge, clean_name
|
||||
|
||||
# howrare.is api
|
||||
# https://api.howrare.is/v0.1/collections/smb/only_rarity
|
||||
@ -34,66 +34,28 @@ from utils import clean_token_id, merge, clean_name
|
||||
# old = pd.read_csv('./data/tokens.csv')
|
||||
# metadata[(metadata.collection == 'Galactic Punks') & (metadata.feature_name=='attribute_count')].drop_duplicates(subset=['feature_value']).merge(old)
|
||||
|
||||
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId=f046bec0889c9d431ce124a626237e2236bc2527051d32ed31f6b5e6dc230669&offset=0&limit=500'
|
||||
# r = requests.get(url)
|
||||
# j = r.json()
|
||||
# j.keys()
|
||||
# len(j['data'])
|
||||
# j['data'][0]
|
||||
|
||||
def how_rare_is_api():
|
||||
url = 'https://api.howrare.is/v0.1/collections'
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
j['result'].keys()
|
||||
j['result']['data'][:10]
|
||||
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
|
||||
c_df.head(16)
|
||||
seen = [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
|
||||
len(j['result']['data'])
|
||||
t_data = []
|
||||
metadata = pd.DataFrame()
|
||||
d = {
|
||||
'Degen Apes': 'degenapes'
|
||||
, 'Pesky Penguins': 'peskypenguinclub'
|
||||
, 'Aurory': 'aurory'
|
||||
, 'Solana Monkey Business': 'smb'
|
||||
, 'Thugbirdz': 'thugbirdz'
|
||||
}
|
||||
# for collection, url in d.items():
|
||||
# redo trippin ape tribe
|
||||
for row in c_df.iterrows():
|
||||
row = row[1]
|
||||
collection = row['name']
|
||||
url = row['url'][1:]
|
||||
print('Working on collection {}, {}, {}'.format(collection, len(t_data), len(metadata)))
|
||||
if url in seen or (len(metadata) and collection in metadata.collection.unique()):
|
||||
print('Seen!')
|
||||
continue
|
||||
# collection = 'Cets on Creck'
|
||||
# collection = 'SOLGods'
|
||||
# collection = 'Meerkat Millionaires'
|
||||
# collection = d['url'][1:]
|
||||
# url = 'https://api.howrare.is/v0.1/collections'+d['url']
|
||||
# url = 'https://api.howrare.is/v0.1/collections/meerkatmillionaires'
|
||||
url = 'https://api.howrare.is/v0.1/collections/'+url
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
for i in j['result']['data']['items']:
|
||||
token_id = int(i['id'])
|
||||
nft_rank = int(i['rank'])
|
||||
mint = i['mint']
|
||||
image = i['image']
|
||||
t_data += [[ collection, token_id, nft_rank, mint, image ]]
|
||||
# m_data += [[ collection, token_id, nft_rank ]]
|
||||
m = pd.DataFrame(i['attributes'])
|
||||
m['token_id'] = token_id
|
||||
m['collection'] = collection
|
||||
# metadata = metadata.append(m)
|
||||
metadata = pd.concat([metadata, m])
|
||||
old = pd.read_csv('./data/tokens.csv')
|
||||
|
||||
def add_to_df(t_data):
|
||||
old = pd.read_csv('./data/tokens_2.csv')
|
||||
sorted(old.collection.unique())
|
||||
l0 = len(old)
|
||||
do_merge = False
|
||||
tokens = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
|
||||
len(tokens)
|
||||
tokens[tokens.nft_rank.isnull()]
|
||||
tokens['collection'] = tokens.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
|
||||
metadata['collection'] = metadata.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
|
||||
rem = [ 'Jikan Studios','Fine Fillies' ]
|
||||
print(tokens.groupby('collection').token_id.count())
|
||||
tokens['clean_token_id'] = tokens.token_id
|
||||
tokens['chain'] = 'Solana'
|
||||
tokens = tokens[-tokens.collection.isin(rem)]
|
||||
if do_merge:
|
||||
old['token_id'] = old.token_id.astype(str)
|
||||
tokens['token_id'] = tokens.token_id.astype(str)
|
||||
@ -106,42 +68,230 @@ def how_rare_is_api():
|
||||
old['clean_token_id'] = old.clean_token_id.fillna(old.token_id)
|
||||
old['chain'] = old.chain.fillna('Solana')
|
||||
else:
|
||||
old = old.append(tokens)
|
||||
# old = old.append(tokens)
|
||||
old = pd.concat( [old, tokens] )
|
||||
old['token_id'] = old.token_id.astype(str)
|
||||
old = old.drop_duplicates(subset=['collection','token_id'], keep='last')
|
||||
print('Adding {} rows'.format(len(old) - l0))
|
||||
old[old.collection.isin(tokens.collection.unique())]
|
||||
old[(old.collection.isin(tokens.collection.unique())) & (old.token_id == '6437')]
|
||||
old[old.nft_rank.isnull()].groupby('collection').token_id.count()
|
||||
old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
|
||||
old.to_csv('./data/tokens_2.csv', index=False)
|
||||
# tokens.to_csv('./data/tokens_2.csv', index=False)
|
||||
|
||||
def compile():
|
||||
ctx = get_ctx()
|
||||
query = 'SELECT DISTINCT address FROM silver_CROSSCHAIN.ADDRESS_LABELS'
|
||||
seen = ctx.cursor().execute(query)
|
||||
seen = pd.DataFrame.from_records(iter(seen), columns=[x[0] for x in seen.description])
|
||||
seen = sorted(list(seen.ADDRESS.unique()))
|
||||
|
||||
tokens = pd.read_csv('./data/tokens.csv')
|
||||
tokens = tokens[tokens.chain == 'Solana']
|
||||
single_update_auth_labels = pd.read_csv('./data/single_update_auth_labels.csv')
|
||||
mult_update_auth_labels = pd.read_csv('./data/mult_update_auth_labels.csv')
|
||||
df = tokens.append(single_update_auth_labels).append(mult_update_auth_labels)
|
||||
df = df[ (df.collection != 'Nan') & (df.collection != 'nan') & (df.collection.notnull()) ]
|
||||
df = df[-(df.mint_address.isin(seen))]
|
||||
df = df.drop_duplicates(subset=['mint_address'], keep='first')
|
||||
# len(df)
|
||||
# len(df.collection.unique())
|
||||
# df.head()
|
||||
# df.mint_address.tail(11000).head(5)
|
||||
# df[df.mint_address == '2GgPNKGyzAQL4mriuH4kBpntYCNVSM2pQfzdsu3p8du5']
|
||||
# df['seen'] = df.mint_address.isin(seen).astype(int)
|
||||
# tmp = df[df.seen == 0].groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
|
||||
# tmp.head(40)
|
||||
# tmp.mint_address.sum()
|
||||
df[df.mint_address.isnull()]
|
||||
df[['mint_address','collection']].to_csv('~/Downloads/solana-nft-labels-06-29.csv', index=False)
|
||||
|
||||
|
||||
def add_to_df(t_data, metadata, exclude_new = False):
|
||||
old = pd.read_csv('./data/tokens.csv')
|
||||
g0 = old.groupby('collection').token_id.count().reset_index()
|
||||
sorted(old.collection.unique())
|
||||
l0 = len(old)
|
||||
do_merge = False
|
||||
tokens = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
|
||||
len(tokens)
|
||||
tokens[tokens.nft_rank.isnull()]
|
||||
tokens['collection'] = tokens.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
|
||||
# rem = [ 'Jikan Studios','Fine Fillies' ]
|
||||
# print(tokens.groupby('collection').token_id.count())
|
||||
metadata['collection'] = metadata.collection.apply(lambda x: 'Catalina Whale Mixer' if x == 'Catalina Whales' else x )
|
||||
tokens['clean_token_id'] = tokens.token_id
|
||||
tokens['chain'] = 'Solana'
|
||||
# tokens = tokens[-tokens.collection.isin(rem)]
|
||||
# metadata = metadata[-metadata.collection.isin(rem)]
|
||||
if do_merge:
|
||||
old['token_id'] = old.token_id.astype(str)
|
||||
tokens['token_id'] = tokens.token_id.astype(str)
|
||||
old = old.merge(tokens, how='left', on=['collection','token_id'])
|
||||
old[old.collection == 'Solana Monkey Business']
|
||||
for c in [ 'nft_rank','mint_address','image_url' ]:
|
||||
old[c] = old[c+'_x'].fillna(old[c+'_y'])
|
||||
del old[c+'_x']
|
||||
del old[c+'_y']
|
||||
old['clean_token_id'] = old.clean_token_id.fillna(old.token_id)
|
||||
old['chain'] = old.chain.fillna('Solana')
|
||||
else:
|
||||
# old = old.append(tokens)
|
||||
old['collection'] = old.collection.apply(lambda x: clean_name(x))
|
||||
tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
|
||||
if exclude_new:
|
||||
rem = tokens.collection.unique()
|
||||
old = old[-(old.collection.isin(rem))]
|
||||
old = pd.concat( [old, tokens] )
|
||||
old['token_id'] = old.token_id.astype(str)
|
||||
old = old.drop_duplicates(subset=['collection','token_id'], keep='last')
|
||||
g1 = old.groupby('collection').token_id.count().reset_index()
|
||||
g = g0.merge(g1, how='outer', on=['collection']).fillna(0)
|
||||
g['dff'] = g.token_id_y - g.token_id_x
|
||||
print(g[g.dff != 0].sort_values('dff', ascending=0))
|
||||
g[g.dff != 0].sort_values('dff', ascending=0).to_csv('~/Downloads/tmp.csv', index=False)
|
||||
print('Adding {} rows'.format(len(old) - l0))
|
||||
old = old[old.collection != 'Solanamonkeybusiness (Smb)']
|
||||
# old[old.collection.isin(tokens.collection.unique())]
|
||||
# old[(old.collection.isin(tokens.collection.unique())) & (old.token_id == '6437')]
|
||||
old[old.nft_rank.isnull()].groupby('collection').token_id.count()
|
||||
# old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
|
||||
old.to_csv('./data/tokens.csv', index=False)
|
||||
# tokens.to_csv('./data/tokens_2.csv', index=False)
|
||||
|
||||
old = pd.read_csv('./data/metadata.csv')
|
||||
a = old[['collection','token_id']].drop_duplicates()
|
||||
a['exclude'] = 0
|
||||
a['token_id'] = a.token_id.astype(str)
|
||||
metadata['token_id'] = metadata.token_id.astype(str)
|
||||
m = metadata.merge(a, how='left')
|
||||
m = m[m.exclude.isnull()]
|
||||
len(m[m.exclude.isnull()].token_id.unique())
|
||||
del m['exclude']
|
||||
# old = old[-(old.collection == 'Meerkat Millionaires Cc')]
|
||||
print(sorted(old.collection.unique()))
|
||||
g0 = old.groupby('collection').token_id.count().reset_index()
|
||||
l0 = len(old)
|
||||
metadata.collection.unique()
|
||||
old['collection'] = old.collection.apply(lambda x: clean_name(x))
|
||||
metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
|
||||
if exclude_new:
|
||||
rem = metadata.collection.unique()
|
||||
old = old[-(old.collection.isin(rem))]
|
||||
# old = old[-old.collection.isin(['Astrals','Dazedducks','Nyanheroes','Shadowysupercoder','Taiyorobotics'])]
|
||||
# a = old[['collection','token_id']].drop_duplicates()
|
||||
# a['exclude'] = 0
|
||||
# a['token_id'] = a.token_id.astype(str)
|
||||
# metadata['token_id'] = metadata.token_id.astype(str)
|
||||
# m = metadata.merge(a, how='left')
|
||||
# m = m[m.exclude.isnull()]
|
||||
# len(m[m.exclude.isnull()].token_id.unique())
|
||||
# del m['exclude']
|
||||
# old = old[-(old.collection == 'Meerkat Millionaires Cc')]
|
||||
# print(sorted(old.collection.unique()))
|
||||
# metadata.collection.unique()
|
||||
# metadata = pd.DataFrame(t_data, columns=['collection','token_id','nft_rank','mint_address','image_url'])
|
||||
# old = old.merge(tokens, how='left', on=['collection','token_id'])
|
||||
old = old.append(m[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'}) )
|
||||
# old = old.append(m[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'}) )
|
||||
old = pd.concat( [old, metadata[['collection','token_id','name','value']].rename(columns={'name':'feature_name','value':'feature_value'})] )
|
||||
old['token_id'] = old.token_id.astype(str)
|
||||
old = old.drop_duplicates(subset=['collection','token_id','feature_name'])
|
||||
old = old.drop_duplicates(subset=['collection','token_id','feature_name'], keep='last')
|
||||
# old['nft_rank'] = old.nft_rank_y.fillna(old.nft_rank_y)
|
||||
# del old['nft_rank_x']
|
||||
g1 = old.groupby('collection').token_id.count().reset_index()
|
||||
g = g0.merge(g1, how='outer', on=['collection']).fillna(0)
|
||||
g['dff'] = g.token_id_y - g.token_id_x
|
||||
print(g[g.dff != 0].sort_values('dff', ascending=0))
|
||||
# del old['nft_rank_y']
|
||||
print('Adding {} rows'.format(len(old) - l0))
|
||||
print(old.groupby('collection').token_id.count())
|
||||
old[old.collection.isin(metadata.collection.unique())]
|
||||
old[(old.collection == 'Catalina Whale Mixer') & (old.token_id == '1206')]
|
||||
# print(old.groupby('collection').token_id.count())
|
||||
# old[old.collection.isin(metadata.collection.unique())]
|
||||
# old[(old.collection == 'Catalina Whale Mixer') & (old.token_id == '1206')]
|
||||
old.to_csv('./data/metadata.csv', index=False)
|
||||
# metadata.to_csv('./data/metadata_2.csv', index=False)
|
||||
|
||||
def how_rare_is_api():
|
||||
ctx = get_ctx()
|
||||
query = '''
|
||||
SELECT DISTINCT LOWER(project_name) AS lower_collection
|
||||
FROM solana.core.dim_nft_metadata
|
||||
'''
|
||||
df = ctx.cursor().execute(query)
|
||||
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
|
||||
|
||||
url = 'https://api.howrare.is/v0.1/collections'
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
j['result'].keys()
|
||||
j['result']['data'][:10]
|
||||
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
|
||||
c_df['lower_collection'] = c_df.url.apply(lambda x: x.lower().strip() )
|
||||
seen = sorted(df.LOWER_COLLECTION.apply(lambda x: re.sub(' |_|\'', '', x) ).values)
|
||||
# seen[:300]
|
||||
# x = 590
|
||||
# seen[x:x+50]
|
||||
c_df['seen_1'] = c_df.url.apply(lambda x: re.sub(' |_|\'', '', x[1:]).lower() in seen ).astype(int)
|
||||
c_df['seen_2'] = c_df.name.apply(lambda x: re.sub(' |_|\'', '', x).lower() in seen ).astype(int)
|
||||
c_df['seen'] = (c_df.seen_1 + c_df.seen_2 > 0).astype(int)
|
||||
c_df.head()
|
||||
c_df.seen.sum()
|
||||
c_df[c_df.seen == 0].head(10)
|
||||
# c_df.head(16)
|
||||
seen = [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
|
||||
c_df = c_df[-(c_df.url.isin([ '/'+x for x in seen]))]
|
||||
# rem = [ 'kaiju','jikanstudios' ]
|
||||
# c_df = c_df[-(c_df.url.isin([ '/'+x for x in rem]))]
|
||||
# seen = list(pd.read_csv('./data/tokens.csv').collection.unique())
|
||||
# c_df = c_df[-(c_df.name.isin(seen))]
|
||||
# len(j['result']['data'])
|
||||
# c_df = c_df[c_df.url.isin(['/blocksmithlabs'])]
|
||||
# c_df = c_df[c_df.url.isin(['/generousrobotsdao','/thestonedfrogs'])]
|
||||
c_df = c_df[c_df.seen == 0]
|
||||
sorted(c_df.url.unique())
|
||||
it = 0
|
||||
tot = len(c_df)
|
||||
# c_df.head()
|
||||
# c_df = c_df[c_df.url != '/midnightpanthers']
|
||||
t_data = []
|
||||
m_data = []
|
||||
# metadata = pd.DataFrame()
|
||||
for row in c_df.iterrows():
|
||||
it += 1
|
||||
row = row[1]
|
||||
collection = row['name']
|
||||
print('#{} / {}: {}'.format(it, tot, collection))
|
||||
url = row['url'][1:]
|
||||
if it > 1:
|
||||
assert(len(t_data))
|
||||
assert(len(m_data))
|
||||
print('Working on collection {}, {}, {}'.format(collection, len(t_data), len(m_data)))
|
||||
# if url in seen or (len(metadata) and collection in metadata.collection.unique()):
|
||||
# print('Seen!')
|
||||
# continue
|
||||
# collection = 'Cets on Creck'
|
||||
# collection = 'SOLGods'
|
||||
# collection = 'Meerkat Millionaires'
|
||||
# collection = d['url'][1:]
|
||||
# url = 'https://api.howrare.is/v0.1/collections'+d['url']
|
||||
# url = 'https://api.howrare.is/v0.1/collections/meerkatmillionaires'
|
||||
# url = 'https://api.howrare.is/v0.1/collections/'+url+'/only_rarity'
|
||||
url = 'https://api.howrare.is/v0.1/collections/'+url
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
for i in j['result']['data']['items']:
|
||||
try:
|
||||
token_id = int(i['id'])
|
||||
if True:
|
||||
nft_rank = int(i['rank'])
|
||||
mint = i['mint']
|
||||
image = i['image']
|
||||
t_data += [[ collection, token_id, nft_rank, mint, image ]]
|
||||
if False:
|
||||
for d in i['attributes']:
|
||||
d['token_id'] = token_id
|
||||
d['collection'] = collection
|
||||
m_data += [ d ]
|
||||
# metadata = metadata.append(m)
|
||||
# metadata = pd.concat([metadata, m])
|
||||
except:
|
||||
print('Error')
|
||||
# add_to_df(t_data)
|
||||
metadata = pd.DataFrame(m_data)
|
||||
metadata
|
||||
|
||||
add_to_df(t_data, metadata, True)
|
||||
metadata.head()
|
||||
metadata.value.unique()
|
||||
|
||||
def convert_collection_names():
|
||||
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
|
||||
|
||||
45
scratch.py
45
scratch.py
@ -1,6 +1,6 @@
|
||||
import os
|
||||
import json
|
||||
# import psycopg2
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
@ -26,6 +26,49 @@ def thorchain():
|
||||
|
||||
def f():
|
||||
conn = psycopg2.connect("dbname=suppliers user=postgres password=postgres")
|
||||
conn = psycopg2.connect("dbname=suppliers user=postgres password=postgres")
|
||||
conn = psycopg2.connect(
|
||||
host="vic5o0tw1w-repl.twtim97jsb.tsdb.cloud.timescale.com",
|
||||
user="tsdbadmin",
|
||||
password="yP4wU5bL0tI0kP3k"
|
||||
)
|
||||
|
||||
query = '''
|
||||
SELECT from_addr
|
||||
, to_addr
|
||||
, asset
|
||||
, amount_e8
|
||||
, block_timestamp
|
||||
, COUNT(1) AS n
|
||||
FROM midgard.transfer_events
|
||||
WHERE block_timestamp < 1650000000000000000
|
||||
AND block_timestamp >= 1640000000000000000
|
||||
GROUP BY 1, 2, 3, 4, 5
|
||||
HAVING COUNT(1) > 1
|
||||
'''
|
||||
df = pd.read_sql_query(query, conn)
|
||||
cur.execute(query)
|
||||
|
||||
it = 0
|
||||
qs = []
|
||||
for i in range(1618000000000000000, 1657000000000000000, 3000000000000000):
|
||||
print(i)
|
||||
it += 1
|
||||
query = '''
|
||||
SELECT from_addr
|
||||
, to_addr
|
||||
, asset
|
||||
, amount_e8
|
||||
, block_timestamp
|
||||
, COUNT(1) AS n
|
||||
FROM midgard.transfer_events
|
||||
WHERE block_timestamp >= {}
|
||||
AND block_timestamp < {}
|
||||
GROUP BY 1, 2, 3, 4, 5
|
||||
HAVING COUNT(1) > 1
|
||||
'''.format(i, i + 3000000000000000)
|
||||
with open('/Users/kellenblumberg/Downloads/query_{}.txt'.format(it), 'w') as f:
|
||||
f.write(query)
|
||||
|
||||
|
||||
def read_tokenlist():
|
||||
|
||||
4915
scratch.sql
4915
scratch.sql
File diff suppressed because it is too large
Load Diff
1140
solana_model.py
1140
solana_model.py
File diff suppressed because it is too large
Load Diff
20
utils.py
20
utils.py
@ -1,5 +1,7 @@
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import snowflake.connector
|
||||
|
||||
|
||||
clean_names = {
|
||||
@ -20,9 +22,26 @@ clean_names = {
|
||||
,'mayc': 'MAYC'
|
||||
,'solgods': 'SOLGods'
|
||||
,'meerkatmillionairescc': 'Meerkat Millionaires'
|
||||
,'ggsg:galacticgeckos': 'Galactic Geckos'
|
||||
,'solstein': 'SolStein'
|
||||
# ,'stonedapecrew': 'Stoned Ape Crew'
|
||||
}
|
||||
|
||||
def get_ctx():
|
||||
usr = os.getenv('SNOWFLAKE_USR')
|
||||
pwd = os.getenv('SNOWFLAKE_PWD')
|
||||
# with open('snowflake.pwd', 'r') as f:
|
||||
# pwd = f.readlines()[0].strip()
|
||||
# with open('snowflake.usr', 'r') as f:
|
||||
# usr = f.readlines()[0].strip()
|
||||
|
||||
ctx = snowflake.connector.connect(
|
||||
user=usr,
|
||||
password=pwd,
|
||||
account='vna27887.us-east-1'
|
||||
)
|
||||
return(ctx)
|
||||
|
||||
def format_num(x):
|
||||
return('{:,}'.format(round(x, 2)))
|
||||
|
||||
@ -48,6 +67,7 @@ def clean_name(name):
|
||||
name = re.sub('-', ' ', name)
|
||||
name = re.sub(' On ', ' on ', name)
|
||||
name = re.sub('Defi ', 'DeFi ', name)
|
||||
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
|
||||
return(name)
|
||||
|
||||
|
||||
|
||||
BIN
viz/.DS_Store
vendored
BIN
viz/.DS_Store
vendored
Binary file not shown.
@ -33,6 +33,7 @@ clean_names = {
|
||||
,'solgods': 'SOLGods'
|
||||
,'meerkatmillionairescc': 'Meerkat Millionaires'
|
||||
,'stonedapecrew': 'Stoned Ape Crew'
|
||||
,'stonedapecrew': 'Stoned Ape Crew'
|
||||
}
|
||||
|
||||
def clean_name(name):
|
||||
@ -44,6 +45,7 @@ def clean_name(name):
|
||||
name = re.sub('-', ' ', name)
|
||||
name = re.sub(' On ', ' on ', name)
|
||||
name = re.sub('Defi ', 'DeFi ', name)
|
||||
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
|
||||
return(name)
|
||||
|
||||
#########################
|
||||
@ -80,6 +82,7 @@ def clean_token_id(df, data_folder):
|
||||
del df['clean_token_id']
|
||||
return(df)
|
||||
|
||||
# '~/git/nft-deal-score/viz/'
|
||||
def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudio-data/'):
|
||||
fname = data_folder+'nft_deal_score_sales.csv'
|
||||
ctx = get_ctx(usr, pwd)
|
||||
@ -104,7 +107,7 @@ def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudi
|
||||
old[old.token_id.isnull()].groupby('collection').sale_date.count()
|
||||
go = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_old'})
|
||||
l0 = len(old)
|
||||
app = old[old.collection.isin(m.collection.unique())].append(m)
|
||||
app = pd.concat([old[old.collection.isin(m.collection.unique())], m])
|
||||
app = app[ app.price > 0 ]
|
||||
app['tmp'] = app.apply(lambda x: x['collection']+str(int(float(x['token_id'])))+x['sale_date'][:10], 1 )
|
||||
if len(app[app.tx_id.isnull()]):
|
||||
@ -115,7 +118,7 @@ def add_sales(query, usr, pwd, do_clean_token_id = False, data_folder = '/rstudi
|
||||
else:
|
||||
app = app.drop_duplicates(subset=['tx_id'])
|
||||
old = old[-old.collection.isin(app.collection.unique())]
|
||||
old = old.append(app)
|
||||
old = pd.concat([old, app])
|
||||
|
||||
old = old[[ 'collection','token_id','sale_date','price','tx_id' ]]
|
||||
|
||||
@ -142,22 +145,43 @@ def add_solana_sales(usr, pwd, data_folder = '/rstudio-data/'):
|
||||
, sales_amount AS price
|
||||
FROM solana.fact_nft_sales s
|
||||
JOIN solana.dim_nft_metadata m ON LOWER(m.mint) = LOWER(s.mint)
|
||||
WHERE block_timestamp >= CURRENT_DATE - 14
|
||||
WHERE block_timestamp >= CURRENT_DATE - 7
|
||||
AND m.project_name IN (
|
||||
'Astrals',
|
||||
'Aurory',
|
||||
'Cets On Creck',
|
||||
'Blocksmith Labs',
|
||||
'Bohemia',
|
||||
'Bot Head',
|
||||
'Bubblegoose Ballers',
|
||||
'Cat Cartel',
|
||||
'Catalina Whale Mixer',
|
||||
'Cets On Creck',
|
||||
'Citizens by Solsteads',
|
||||
'Communi3: Mad Scientists',
|
||||
'DeFi Pirates',
|
||||
'DeFi Pirates',
|
||||
'DeGods',
|
||||
'Degen Apes',
|
||||
'Degen Dojo',
|
||||
'Doge Capital',
|
||||
'Famous Fox Federation',
|
||||
'GGSG: Galactic Geckos',
|
||||
'Just Ape.',
|
||||
'Looties',
|
||||
'Meerkat Millionaires',
|
||||
'Monkey Baby Business',
|
||||
'Okay Bears',
|
||||
'Pesky Penguins',
|
||||
'Primates',
|
||||
'Quantum Traders',
|
||||
'SOLGods',
|
||||
'SolStein',
|
||||
'Solana Monke Rejects',
|
||||
'Solana Monkey Business',
|
||||
'Solanauts',
|
||||
'Stoned Ape Crew',
|
||||
'Thugbirdz'
|
||||
'Thugbirdz',
|
||||
'Trippin Ape Tribe'
|
||||
)
|
||||
'''
|
||||
add_sales(query, usr, pwd, False, data_folder)
|
||||
@ -172,10 +196,10 @@ def add_ethereum_sales(usr, pwd, data_folder = '/rstudio-data/'):
|
||||
, price
|
||||
, tx_id
|
||||
FROM ethereum.nft_events
|
||||
WHERE project_name IN (
|
||||
'BoredApeYachtClub'
|
||||
, 'MutantApeYachtClub'
|
||||
, 'BoredApeKennelClub'
|
||||
WHERE LOWER(project_name) IN (
|
||||
'boredapeyachtclub'
|
||||
, 'mutantapeyachtclub'
|
||||
, 'boredapekennelclub'
|
||||
)
|
||||
AND price IS NOT NULL
|
||||
AND block_timestamp >= CURRENT_DATE - 14
|
||||
|
||||
@ -5,10 +5,10 @@ account: kellen
|
||||
server: science.flipsidecrypto.xyz
|
||||
hostUrl: https://science.flipsidecrypto.xyz/__api__
|
||||
appId: 114
|
||||
bundleId: 405
|
||||
bundleId: 434
|
||||
url: https://science.flipsidecrypto.xyz/content/67141ada-46fd-4750-a690-0be248c461f3/
|
||||
when: 1654490491.17543
|
||||
lastSyncTime: 1654490491.17545
|
||||
when: 1656289651.71961
|
||||
lastSyncTime: 1656289651.71962
|
||||
asMultiple: FALSE
|
||||
asStatic: FALSE
|
||||
ignoredFiles: scrape_terra_nfts.py|add_sales.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_listings.csv
|
||||
|
||||
@ -5,10 +5,10 @@ account: kellen
|
||||
server: science.flipsidecrypto.xyz
|
||||
hostUrl: https://science.flipsidecrypto.xyz/__api__
|
||||
appId: 93
|
||||
bundleId: 410
|
||||
bundleId: 435
|
||||
url: https://science.flipsidecrypto.xyz/nft-deal-score/
|
||||
when: 1654526436.70554
|
||||
lastSyncTime: 1654526436.70555
|
||||
when: 1656375887.36944
|
||||
lastSyncTime: 1656375887.36945
|
||||
asMultiple: FALSE
|
||||
asStatic: FALSE
|
||||
ignoredFiles: add_sales.py|data (2).Rdata|data copy 2.Rdata|data copy.Rdata|data.Rdata|exploration.R|loan_score_model.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_listings.csv|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_tokens.csv|scrape_eth_nfts.py|scrape_terra_nfts.py|update_data.R|update_nft_deal_score_data.RMD|upload_data.R|utils.py
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
name: update_nft_deal_score_data4
|
||||
title:
|
||||
username: kellen
|
||||
account: kellen
|
||||
server: science.flipsidecrypto.xyz
|
||||
hostUrl: https://science.flipsidecrypto.xyz/__api__
|
||||
appId: 114
|
||||
bundleId: 430
|
||||
url: https://science.flipsidecrypto.xyz/content/67141ada-46fd-4750-a690-0be248c461f3/
|
||||
when: 1656200355.03703
|
||||
lastSyncTime: 1656200355.03704
|
||||
asMultiple: FALSE
|
||||
asStatic: FALSE
|
||||
ignoredFiles: add_sales.py|data (2).Rdata|data copy 2.Rdata|data copy.Rdata|data.Rdata|exploration.R|loan_score_model.py|nft_deal_score_data.RData|nft_deal_score_listings_data.RData|nft_deal_score_listings.csv|nft_deal_score_sales_data.RData|nft_deal_score_sales.csv|nft_deal_score_tokens.csv|scrape_eth_nfts.py|scrape_terra_nfts.py|update_data.R|update_nft_deal_score_data.RMD|upload_data.R|utils.py
|
||||
@ -47,6 +47,7 @@ def clean_name(name):
|
||||
name = re.sub('-', ' ', name)
|
||||
name = re.sub(' On ', ' on ', name)
|
||||
name = re.sub('Defi ', 'DeFi ', name)
|
||||
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
|
||||
return(name)
|
||||
|
||||
def scrape_randomearth(data_folder = '/rstudio-data/'):
|
||||
|
||||
@ -3,8 +3,10 @@ server <- function(input, output, session) {
|
||||
user <- Sys.info()[['user']]
|
||||
# options(warn=-1)
|
||||
|
||||
isRstudio <- user != 'kellenblumberg'
|
||||
# isRstudio <- TRUE
|
||||
base_dir <- ifelse(
|
||||
user == 'rstudio-connect'
|
||||
isRstudio
|
||||
, '/rstudio-data/'
|
||||
, ifelse(user == 'fcaster'
|
||||
, '/srv/shiny-server/nft-deal-score/'
|
||||
@ -472,7 +474,7 @@ server <- function(input, output, session) {
|
||||
selectInput(
|
||||
inputId = 'collectionname'
|
||||
, label = NULL
|
||||
, selected = 'Catalina Whale Mixer'
|
||||
, selected = 'Famous Fox Federation'
|
||||
, choices = choices
|
||||
, width = "100%"
|
||||
)
|
||||
@ -561,7 +563,7 @@ updateSelectizeInput(session, 'tokenid', choices = choices, server = TRUE)
|
||||
selected %in% c('Cets on Creck')
|
||||
, strsplit(selected, ' |s ')[[1]][1]
|
||||
, ifelse(
|
||||
selected %in% c('Stoned Ape Crew', 'Catalina Whale Mixer')
|
||||
selected %in% c('Stoned Ape Crew', 'Catalina Whale Mixer','Famous Fox Federation')
|
||||
, paste(strsplit(selected, ' ')[[1]][1], strsplit(selected, ' ')[[1]][2], sep = ' ')
|
||||
, substr(selected, 1, nchar(selected) - 1)
|
||||
)
|
||||
|
||||
6
viz/ui.R
6
viz/ui.R
@ -30,9 +30,9 @@ fluidPage(
|
||||
class="hero"
|
||||
, fluidRow(
|
||||
class = "header-images",
|
||||
column(4, uiOutput("solanaimg")),
|
||||
column(4, uiOutput("terraimg")),
|
||||
column(4, uiOutput("ethereumimg"))
|
||||
column(6, uiOutput("solanaimg")),
|
||||
# column(4, uiOutput("terraimg")),
|
||||
column(6, uiOutput("ethereumimg"))
|
||||
)
|
||||
, h1(
|
||||
class="header",
|
||||
|
||||
@ -3,143 +3,76 @@ library(reticulate)
|
||||
library(httr)
|
||||
library(jsonlite)
|
||||
|
||||
user <- Sys.info()[['user']]
|
||||
isRstudio <- user == 'rstudio-connect'
|
||||
|
||||
# nft_deal_score_listings_data.RData
|
||||
.topic = 'prod-nft-metadata-uploads'
|
||||
.key = 'solana-nft-metadata'
|
||||
.url = 'https://kafka-rest-proxy.flipside.systems'
|
||||
|
||||
user <- Sys.info()[['user']]
|
||||
isRstudio <- user %in% c('rstudio-connect','data-science')
|
||||
|
||||
base_dir <- ifelse(
|
||||
user == 'rstudio-connect'
|
||||
isRstudio
|
||||
, '/rstudio-data/'
|
||||
, ifelse(user == 'fcaster'
|
||||
, '/srv/shiny-server/nft-deal-score/'
|
||||
, '~/git/nft-deal-score/viz/'
|
||||
)
|
||||
, '~/git/nft-deal-score/viz/'
|
||||
)
|
||||
# base_dir <- '/srv/shiny-server/nft-deal-score/'
|
||||
listings_file <- paste0(base_dir,'nft_deal_score_listings_data.RData')
|
||||
load(listings_file)
|
||||
|
||||
if(isRstudio) {
|
||||
source('/home/data-science/data_science/util/util_functions.R')
|
||||
source_python('/home/data-science/data_science/nft-deal-score/scrape_terra_nfts.py')
|
||||
source_python('/home/data-science/data_science/viz/nft-deal-score/upload_solana_nft_labels.py')
|
||||
} else {
|
||||
source('~/data_science/util/util_functions.R')
|
||||
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
|
||||
source_python(paste0(base_dir, 'upload_solana_nft_labels.py'))
|
||||
}
|
||||
|
||||
# py_install('pandas', pip = TRUE)
|
||||
# py_install('cloudscraper', pip = TRUE)
|
||||
# py_install('snowflake-connector-python', pip = TRUE)
|
||||
# cloudscraper <- import('cloudscraper')
|
||||
|
||||
base_dir <- ifelse(
|
||||
user == 'rstudio-connect'
|
||||
, '/rstudio-data/'
|
||||
, ifelse(user == 'fcaster'
|
||||
, '/srv/shiny-server/nft-deal-score/'
|
||||
, '~/git/nft-deal-score/viz/'
|
||||
)
|
||||
)
|
||||
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
|
||||
source_python(paste0(base_dir, 'add_sales.py'))
|
||||
|
||||
query <- '
|
||||
SELECT DISTINCT project_name AS collection
|
||||
, mint AS tokenMint
|
||||
, token_id
|
||||
FROM solana.dim_nft_metadata
|
||||
'
|
||||
mints <- QuerySnowflake(query)
|
||||
colnames(mints) <- c('collection','tokenMint','token_id')
|
||||
|
||||
# pull terra listings
|
||||
terra_listings <- scrape_randomearth(base_dir)
|
||||
head(terra_listings)
|
||||
unique(terra_listings$collection)
|
||||
#########################
|
||||
# Load NFT Data #
|
||||
#########################
|
||||
mints_from_me()
|
||||
pull_from_metaboss()
|
||||
how_rare_is_api()
|
||||
# saves labels to '/rstudio-data/nft_labels/solana_nft_labels.csv'
|
||||
compile()
|
||||
|
||||
|
||||
get_me_url <- function(collection, offset) {
|
||||
return(paste0('https://api-mainnet.magiceden.dev/v2/collections/',collection,'/listings?offset=',offset,'&limit=20'))
|
||||
}
|
||||
get_smb_url <- function(page) {
|
||||
return(paste0('https://market.solanamonkey.business/api/items?limit=40&page=',page))
|
||||
}
|
||||
|
||||
solana_listings <- data.table()
|
||||
|
||||
solana_collections <- c(
|
||||
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
|
||||
)
|
||||
for(collection in solana_collections) {
|
||||
print(paste0('Working on ', collection, '...'))
|
||||
has_more <- TRUE
|
||||
offset <- 0
|
||||
while(has_more) {
|
||||
Sys.sleep(1)
|
||||
print(paste0('Offset #', offset))
|
||||
url <- get_me_url(collection, offset)
|
||||
response <- GET(url)
|
||||
content <- rawToChar(response$content)
|
||||
content <- fromJSON(content)
|
||||
if( typeof(content) == 'list' ) {
|
||||
content <- rbindlist(content, fill=T)
|
||||
}
|
||||
has_more <- nrow(content) >= 20
|
||||
if(nrow(content) > 0 && length(content) > 0) {
|
||||
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
|
||||
df <- df[, list(collection, token_id, price)]
|
||||
offset <- offset + 20
|
||||
solana_listings <- rbind(solana_listings, df)
|
||||
} else {
|
||||
has_more <- FALSE
|
||||
}
|
||||
###############################
|
||||
# Upload NFT Metadata #
|
||||
###############################
|
||||
files <- list.files(paste0(base_dir, 'nft_labels/metadata/results/'))
|
||||
it <- 0
|
||||
for(f in files) {
|
||||
print(f)
|
||||
results <- read.csv(paste0(base_dir,'/nft_labels/metadata/results/',f))
|
||||
for(r in results$results) {
|
||||
it <- it + 1
|
||||
print(paste0('#',it))
|
||||
out <- tryCatch(
|
||||
{
|
||||
# s <- readChar(fileName, file.info(fileName)$size)
|
||||
s <- r
|
||||
.body <- paste0(
|
||||
'{"records": [{"key": "',.key,'","value":',s,'}]}',
|
||||
collapse = ""
|
||||
)
|
||||
r <- httr::POST(url = paste(.url,"topics",.topic,sep = "/"),
|
||||
add_headers('Content-Type' = "application/vnd.kafka.json.v2+json",
|
||||
'Accept' = "application/vnd.kafka.v2+json, application/vnd.kafka+json, application/json"),
|
||||
body = .body)
|
||||
print(r)
|
||||
},
|
||||
error=function(cond) {
|
||||
print(cond)
|
||||
return(NA)
|
||||
},
|
||||
warning=function(cond) {
|
||||
print(cond)
|
||||
return(NULL)
|
||||
},
|
||||
finally={
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
for(collection in c('Solana Monkey Business')) {
|
||||
print(paste0('Working on ', collection, '...'))
|
||||
has_more <- TRUE
|
||||
page <- 1
|
||||
while(has_more) {
|
||||
Sys.sleep(1)
|
||||
print(paste0('Page #', page))
|
||||
url <- get_smb_url(page)
|
||||
response <- GET(url)
|
||||
content <- rawToChar(response$content)
|
||||
content <- fromJSON(content)
|
||||
# content <- rbindlist(content, fill=T)
|
||||
content <- content %>% as.data.table()
|
||||
has_more <- nrow(content) > 0 && 'price' %in% colnames(content)
|
||||
if(has_more) {
|
||||
content <- content[, list(mint, price)]
|
||||
content <- unique(content)
|
||||
content$price <- as.numeric(content$price) / (10^9)
|
||||
has_more <- nrow(content) >= 40
|
||||
colnames(content)[1] <- 'tokenMint'
|
||||
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
|
||||
df <- df[, list(collection, token_id, price)]
|
||||
page <- page + 1
|
||||
solana_listings <- rbind(solana_listings, df)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
head(solana_listings)
|
||||
head(terra_listings)
|
||||
new_listings <- rbind(solana_listings, terra_listings)
|
||||
new_listings <- unique(new_listings)
|
||||
|
||||
# listings <- read.csv('./data/listings.csv') %>% as.data.table()
|
||||
rem <- unique(new_listings$collection)
|
||||
rem
|
||||
listings <- listings[ !(collection %in% eval(rem)), ]
|
||||
listings <- listings[, list(collection, token_id, price)]
|
||||
listings <- rbind(listings, new_listings)
|
||||
listings <- listings[order(collection, price)]
|
||||
listings[, token_id := as.integer(token_id)]
|
||||
|
||||
save(
|
||||
listings
|
||||
, file = listings_file
|
||||
)
|
||||
|
||||
|
||||
@ -51,16 +51,19 @@ library(reticulate)
|
||||
# py_install('cloudscraper', pip = TRUE)
|
||||
# r reticulate python ModuleNotFoundError
|
||||
# print('54')
|
||||
use_python('/opt/python/3.10.4/bin/python')
|
||||
py_install('pandas', pip = TRUE)
|
||||
py_install('snowflake-connector-python', pip = TRUE)
|
||||
|
||||
SD_MULT = 3
|
||||
SD_SCALE = 1.95
|
||||
|
||||
user <- Sys.info()[['user']]
|
||||
isRstudio <- user == 'rstudio-connect'
|
||||
# isRstudio <- user == 'rstudio-connect'
|
||||
isRstudio <- user != 'kellenblumberg'
|
||||
# isRstudio <- TRUE
|
||||
if (isRstudio) {
|
||||
use_python('/opt/python/3.10.4/bin/python')
|
||||
py_install('pandas', pip = TRUE)
|
||||
py_install('snowflake-connector-python', pip = TRUE)
|
||||
}
|
||||
|
||||
base_dir <- ifelse(
|
||||
isRstudio
|
||||
@ -109,6 +112,7 @@ add_ethereum_sales(usr, pwd, base_dir)
|
||||
# read sales data from nft_deal_score_sales.csv
|
||||
raw_sales <- read.csv(paste0(base_dir,'nft_deal_score_sales.csv')) %>% as.data.table()
|
||||
raw_sales <- raw_sales[order(collection, sale_date, price)]
|
||||
unique(raw_sales$collection)
|
||||
|
||||
# calculate the floor price
|
||||
raw_sales <- raw_sales %>%
|
||||
@ -149,7 +153,7 @@ query <- '
|
||||
'
|
||||
mints <- QuerySnowflake(query)
|
||||
colnames(mints) <- c('collection','tokenMint','token_id')
|
||||
mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
# mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
|
||||
# pull terra listings
|
||||
# terra_listings <- scrape_randomearth(base_dir)
|
||||
@ -168,10 +172,75 @@ get_smb_url <- function(page) {
|
||||
solana_listings <- data.table()
|
||||
|
||||
solana_collections <- c(
|
||||
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
|
||||
'famous_fox_federation'
|
||||
)
|
||||
solana_collections <- c(
|
||||
'okay_bears','the_catalina_whale_mixer','meerkat_millionaires_country_club','solgods','cets_on_creck','stoned_ape_crew','degods','aurory','thugbirdz','solana_monkey_business','degenerate_ape_academy','pesky_penguins'
|
||||
'blocksmith_labs'
|
||||
, 'dazedducks_metagalactic_club'
|
||||
, 'degenerate_trash_pandas'
|
||||
, 'famous_fox_federation'
|
||||
, 'generous_robots_dao'
|
||||
, 'ghostface'
|
||||
, 'ghostface'
|
||||
, 'ghostface_gen_2'
|
||||
, 'portals'
|
||||
, 'smokeheads'
|
||||
, 'theorcs'
|
||||
)
|
||||
|
||||
solana_collections <- c(
|
||||
|
||||
# 'blocksmith_labs'
|
||||
# , 'dazedducks_metagalactic_club'
|
||||
# , 'degenerate_trash_pandas'
|
||||
'famous_fox_federation',
|
||||
# , 'generous_robots_dao'
|
||||
# , 'ghostface'
|
||||
# , 'ghostface_gen_2'
|
||||
# , 'portals'
|
||||
# , 'smokeheads'
|
||||
# , 'theorcs',
|
||||
# 'astrals',
|
||||
'aurory',
|
||||
# 'bohemia_',
|
||||
# 'bothead',
|
||||
'bubblegoose_ballers',
|
||||
# 'cat_cartel',
|
||||
'cets_on_creck',
|
||||
# 'citizens_by_solsteads',
|
||||
# 'communi3',
|
||||
# 'defi_pirates',
|
||||
# 'degendojonft',
|
||||
'degenerate_ape_academy',
|
||||
# 'degenerate_ape_kindergarten',
|
||||
'degods',
|
||||
# 'doge_capital',
|
||||
# 'galactic_gecko_space_garage',
|
||||
# 'justape',
|
||||
# 'looties',
|
||||
# 'marinadechefs',
|
||||
'meerkat_millionaires_country_club',
|
||||
# 'monkey_baby_business',
|
||||
'okay_bears',
|
||||
'pesky_penguins',
|
||||
'portals',
|
||||
'primates',
|
||||
# 'psykerhideouts',
|
||||
# 'quantum_traders',
|
||||
# 'solana_monke_rejects',
|
||||
'solana_monkey_business',
|
||||
# 'solanauts',
|
||||
'solgods',
|
||||
# 'solstein',
|
||||
'stoned_ape_crew',
|
||||
# 'taiyo_infants_incubators',
|
||||
'the_catalina_whale_mixer',
|
||||
# 'the_remnants_',
|
||||
# 'the_tower',
|
||||
# 'the_vaultx_dao',
|
||||
'thugbirdz'
|
||||
# 'trippin_ape_tribe',
|
||||
# 'visionary_studios'
|
||||
)
|
||||
# headers = c(
|
||||
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
|
||||
@ -244,8 +313,6 @@ for(collection in solana_collections) {
|
||||
}
|
||||
}
|
||||
|
||||
solana_listings[order(token_id)]
|
||||
|
||||
for(collection in c('Solana Monkey Business')) {
|
||||
print(paste0('Working on ', collection, '...'))
|
||||
has_more <- TRUE
|
||||
@ -292,6 +359,9 @@ listings <- listings[ !(collection %in% c('LunaBulls','Galactic Punks','Galactic
|
||||
listings <- listings[!is.na(price)]
|
||||
listings <- listings %>% as.data.table()
|
||||
|
||||
sort(unique(listings$collection))
|
||||
# write.csv(unique(listings[, collection]), '~/Downloads/tmp.csv', row.names=F)
|
||||
|
||||
floors <- listings %>%
|
||||
group_by(collection) %>%
|
||||
summarize(cur_floor = min(price)) %>%
|
||||
@ -312,7 +382,7 @@ get_fmp <- function(data, coefsdf, pred_price) {
|
||||
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
|
||||
}
|
||||
|
||||
if(TRUE) {
|
||||
if(FALSE) {
|
||||
coefsdf[, tot := lin_coef + log_coef ]
|
||||
coefsdf[, lin_coef := lin_coef / tot]
|
||||
coefsdf[, log_coef := log_coef / tot]
|
||||
@ -434,7 +504,7 @@ if(TRUE) {
|
||||
|
||||
tmp <- tmp[order(-pts)]
|
||||
content <- tmp[ (price < 0.9 * fair_market_price) , head(.SD, 2), by = collection]
|
||||
content <- content[order(-pts)]
|
||||
content <- head(content[order(-pts)], 15)
|
||||
# content <- paste(c(header, content$label, collapse='\n'))
|
||||
|
||||
content <- paste(c(header, content$label), collapse='\n')
|
||||
@ -459,15 +529,17 @@ if(TRUE) {
|
||||
colnames(fmp)[3] <- 'rarity_rank'
|
||||
colnames(fmp)[4] <- 'deal_score_rank'
|
||||
|
||||
for( cur_collection in unique(fmp$collection)) {
|
||||
print(paste0('Working on ',cur_collection, '...'))
|
||||
data <- fmp[collection == eval(cur_collection)]
|
||||
KafkaGeneric(
|
||||
.topic = 'prod-data-science-uploads'
|
||||
, .url = 'https://kafka-rest-proxy.flipside.systems'
|
||||
, .project = paste0('nft-deal-score-rankings-', cur_collection)
|
||||
, .data = data
|
||||
)
|
||||
if (FALSE) {
|
||||
for( cur_collection in unique(fmp$collection)) {
|
||||
print(paste0('Working on ',cur_collection, '...'))
|
||||
data <- fmp[collection == eval(cur_collection)]
|
||||
KafkaGeneric(
|
||||
.topic = 'prod-data-science-uploads'
|
||||
, .url = 'https://kafka-rest-proxy.flipside.systems'
|
||||
, .project = paste0('nft-deal-score-rankings-', cur_collection)
|
||||
, .data = data
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -480,6 +552,7 @@ KafkaGeneric(
|
||||
, .data = data
|
||||
)
|
||||
|
||||
sort(unique(listings$collection))
|
||||
|
||||
save(
|
||||
listings
|
||||
|
||||
566
viz/update_nft_labels.R
Normal file
566
viz/update_nft_labels.R
Normal file
@ -0,0 +1,566 @@
|
||||
---
|
||||
title: "Update NFT Deal Score Data"
|
||||
author: "Kellen"
|
||||
date: "2022-04-20"
|
||||
output: html_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
```
|
||||
|
||||
## Run Updates
|
||||
|
||||
Hello I am running this at `r Sys.time()`
|
||||
|
||||
```{r update}
|
||||
#include all required libraries here
|
||||
#EVEN IF YOU SOURCE util_functions.R
|
||||
#YOU HAVE TO PUT THE LIBRARIES HERE I KNOW SORRY
|
||||
#BUT HERE THEY ALL ARE TO SAVE YOU TIME
|
||||
# install.packages('RCurl')
|
||||
library(RCurl)
|
||||
library(fasttime)
|
||||
library(gridExtra)
|
||||
library(ggplot2)
|
||||
library(data.table)
|
||||
library(reshape2)
|
||||
library(dplyr)
|
||||
library(dbplyr)
|
||||
library(RJSONIO)
|
||||
library(magrittr)
|
||||
library(RJSONIO)
|
||||
library(xts)
|
||||
library(quantmod)
|
||||
library(fTrading)
|
||||
library(curl)
|
||||
library(stringr)
|
||||
library(aws.s3)
|
||||
library(RPostgres)
|
||||
library(odbc)
|
||||
library(httr)
|
||||
library(jsonlite)
|
||||
library(reticulate)
|
||||
|
||||
#NOW COPY EVERYTHING ELSE FROM YOUR CURRENT
|
||||
#update_data.R FILE HERE ---------->
|
||||
# virtualenv_create('pyvenv')
|
||||
# use_virtualenv('pyvenv')
|
||||
# virtualenv_install('pyvenv', 'pandas')
|
||||
# virtualenv_install('pyvenv', 'pandas')
|
||||
# py_install('cloudscraper', pip = TRUE)
|
||||
# r reticulate python ModuleNotFoundError
|
||||
# print('54')
|
||||
|
||||
SD_MULT = 3
|
||||
SD_SCALE = 1.95
|
||||
|
||||
user <- Sys.info()[['user']]
|
||||
# isRstudio <- user == 'rstudio-connect'
|
||||
isRstudio <- user != 'kellenblumberg'
|
||||
# isRstudio <- TRUE
|
||||
if (isRstudio) {
|
||||
use_python('/opt/python/3.10.4/bin/python')
|
||||
py_install('pandas', pip = TRUE)
|
||||
py_install('snowflake-connector-python', pip = TRUE)
|
||||
}
|
||||
|
||||
base_dir <- ifelse(
|
||||
isRstudio
|
||||
, '/rstudio-data/'
|
||||
, ifelse(user == 'fcaster'
|
||||
, '/srv/shiny-server/nft-deal-score/'
|
||||
, '~/git/nft-deal-score/viz/'
|
||||
)
|
||||
)
|
||||
|
||||
if(isRstudio) {
|
||||
source('/home/data-science/data_science/util/util_functions.R')
|
||||
source('/home/data-science/data_science/util/kafka_utils.R')
|
||||
source_python('/home/data-science/data_science/viz/nft-deal-score/add_sales.py')
|
||||
source_python('~/upload_solana_nft_labels.py')
|
||||
} else {
|
||||
source('~/data_science/util/util_functions.R')
|
||||
source('~/data_science/util/kafka_utils.R')
|
||||
source_python(paste0(base_dir, 'scrape_terra_nfts.py'))
|
||||
source_python(paste0(base_dir, 'add_sales.py'))
|
||||
}
|
||||
|
||||
usr <- readLines(file.path(base.path,"data_science/util/snowflake.usr"))
|
||||
pwd <- readLines(file.path(base.path,"data_science/util/snowflake.pwd"))
|
||||
|
||||
load(paste0(base_dir,'nft_deal_score_data.RData'))
|
||||
|
||||
listings_file <- paste0(base_dir,'nft_deal_score_listings_data.RData')
|
||||
sales_file <- paste0(base_dir,'nft_deal_score_sales_data.RData')
|
||||
load(listings_file)
|
||||
|
||||
|
||||
coefsdf[, tot := lin_coef + log_coef ]
|
||||
coefsdf[, lin_coef := lin_coef / tot]
|
||||
coefsdf[, log_coef := log_coef / tot]
|
||||
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
|
||||
|
||||
# write sales data to nft_deal_score_sales.csv
|
||||
add_solana_sales(usr, pwd, base_dir)
|
||||
add_ethereum_sales(usr, pwd, base_dir)
|
||||
# add_terra_sales(usr, pwd, base_dir)
|
||||
|
||||
# read sales data from nft_deal_score_sales.csv
|
||||
raw_sales <- read.csv(paste0(base_dir,'nft_deal_score_sales.csv')) %>% as.data.table()
|
||||
raw_sales <- raw_sales[order(collection, sale_date, price)]
|
||||
unique(raw_sales$collection)
|
||||
|
||||
# calculate the floor price
|
||||
raw_sales <- raw_sales %>%
|
||||
group_by(collection) %>%
|
||||
mutate(mn_20=lag(price, 1)) %>%
|
||||
as.data.table()
|
||||
|
||||
raw_sales <- raw_sales %>%
|
||||
group_by(collection) %>%
|
||||
mutate(rolling_floor=rollapply(mn_20, width = 20, FUN = "quantile", p = .0575, na.pad = TRUE, align = 'right')) %>%
|
||||
as.data.table()
|
||||
|
||||
raw_sales[, rolling_floor := nafill(rolling_floor, type = "nocb")]
|
||||
|
||||
|
||||
# calculate the fair market price
|
||||
tmp <- merge( raw_sales[, list(collection, token_id, sale_date, price, tx_id, rolling_floor)], coefsdf, by=c('collection') )
|
||||
tmp <- merge( tmp, pred_price, by=c('collection','token_id') )
|
||||
tmp[, abs_chg := (rolling_floor - floor_price) * lin_coef ]
|
||||
tmp[, pct_chg := (rolling_floor - floor_price) * log_coef ]
|
||||
tmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
|
||||
|
||||
# save to an .RData file
|
||||
sales <- tmp[, list(collection, token_id, sale_date, price, nft_rank, fair_market_price, rolling_floor)]
|
||||
colnames(sales) <- c('collection', 'token_id', 'block_timestamp', 'price', 'nft_rank', 'pred', 'mn_20')
|
||||
save(
|
||||
sales
|
||||
, file = sales_file
|
||||
)
|
||||
|
||||
|
||||
# load the mints
|
||||
query <- '
|
||||
SELECT DISTINCT project_name AS collection
|
||||
, mint AS tokenMint
|
||||
, token_id
|
||||
FROM solana.dim_nft_metadata
|
||||
'
|
||||
mints <- QuerySnowflake(query)
|
||||
colnames(mints) <- c('collection','tokenMint','token_id')
|
||||
# mints[ collection == 'Cets On Creck', collection := 'Cets on Creck']
|
||||
|
||||
# pull terra listings
|
||||
# terra_listings <- scrape_randomearth(base_dir)
|
||||
# head(terra_listings)
|
||||
# unique(terra_listings$collection)
|
||||
|
||||
|
||||
# 9c39e05c-db3c-4f3f-ac48-84099111b813
|
||||
get_me_url <- function(collection, offset) {
|
||||
return(paste0('https://api-mainnet.magiceden.dev/v2/collections/',collection,'/listings?offset=',offset,'&limit=20'))
|
||||
}
|
||||
get_smb_url <- function(page) {
|
||||
return(paste0('https://market.solanamonkey.business/api/items?limit=40&page=',page))
|
||||
}
|
||||
|
||||
solana_listings <- data.table()
|
||||
|
||||
solana_collections <- c(
|
||||
'famous_fox_federation'
|
||||
)
|
||||
solana_collections <- c(
|
||||
'blocksmith_labs'
|
||||
, 'dazedducks_metagalactic_club'
|
||||
, 'degenerate_trash_pandas'
|
||||
, 'famous_fox_federation'
|
||||
, 'generous_robots_dao'
|
||||
, 'ghostface'
|
||||
, 'ghostface'
|
||||
, 'ghostface_gen_2'
|
||||
, 'portals'
|
||||
, 'smokeheads'
|
||||
, 'theorcs'
|
||||
)
|
||||
|
||||
solana_collections <- c(
|
||||
|
||||
# 'blocksmith_labs'
|
||||
# , 'dazedducks_metagalactic_club'
|
||||
# , 'degenerate_trash_pandas'
|
||||
'famous_fox_federation',
|
||||
# , 'generous_robots_dao'
|
||||
# , 'ghostface'
|
||||
# , 'ghostface_gen_2'
|
||||
# , 'portals'
|
||||
# , 'smokeheads'
|
||||
# , 'theorcs',
|
||||
# 'astrals',
|
||||
'aurory',
|
||||
# 'bohemia_',
|
||||
# 'bothead',
|
||||
'bubblegoose_ballers',
|
||||
# 'cat_cartel',
|
||||
'cets_on_creck',
|
||||
# 'citizens_by_solsteads',
|
||||
# 'communi3',
|
||||
# 'defi_pirates',
|
||||
# 'degendojonft',
|
||||
'degenerate_ape_academy',
|
||||
# 'degenerate_ape_kindergarten',
|
||||
'degods',
|
||||
# 'doge_capital',
|
||||
# 'galactic_gecko_space_garage',
|
||||
# 'justape',
|
||||
# 'looties',
|
||||
# 'marinadechefs',
|
||||
'meerkat_millionaires_country_club',
|
||||
# 'monkey_baby_business',
|
||||
'okay_bears',
|
||||
'pesky_penguins',
|
||||
'portals',
|
||||
'primates',
|
||||
# 'psykerhideouts',
|
||||
# 'quantum_traders',
|
||||
# 'solana_monke_rejects',
|
||||
'solana_monkey_business',
|
||||
# 'solanauts',
|
||||
'solgods',
|
||||
# 'solstein',
|
||||
'stoned_ape_crew',
|
||||
# 'taiyo_infants_incubators',
|
||||
'the_catalina_whale_mixer',
|
||||
# 'the_remnants_',
|
||||
# 'the_tower',
|
||||
# 'the_vaultx_dao',
|
||||
'thugbirdz'
|
||||
# 'trippin_ape_tribe',
|
||||
# 'visionary_studios'
|
||||
)
|
||||
# headers = c(
|
||||
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
|
||||
# )
|
||||
for(collection in solana_collections) {
|
||||
print(paste0('Working on ', collection, '...'))
|
||||
has_more <- TRUE
|
||||
has_err <- FALSE
|
||||
offset <- 0
|
||||
while(has_more) {
|
||||
Sys.sleep(1)
|
||||
out <- tryCatch(
|
||||
{
|
||||
print(paste0('Offset #', offset))
|
||||
url <- get_me_url(collection, offset)
|
||||
response <- GET(
|
||||
url = url
|
||||
# , add_headers(.headers = c('Authorization'= 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'))
|
||||
, add_headers('Authorization'= 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813')
|
||||
)
|
||||
# r <- content(response, as = 'parsed')
|
||||
content <- rawToChar(response$content)
|
||||
content <- fromJSON(content)
|
||||
if( !is.data.frame(content) ) {
|
||||
content <- rbindlist(content, fill=T)
|
||||
}
|
||||
has_more <- nrow(content) > 0
|
||||
if(nrow(content) > 0 && length(content) > 0) {
|
||||
# content <- data.table(content)
|
||||
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
|
||||
# if(nrow(df) > 0) {
|
||||
# print(min(df$price))
|
||||
# }
|
||||
df <- df[, list(collection, token_id, price)]
|
||||
solana_listings <- rbind(solana_listings, df)
|
||||
} else {
|
||||
has_more <- FALSE
|
||||
}
|
||||
offset <- offset + 20
|
||||
has_err <- FALSE
|
||||
},
|
||||
error=function(cond) {
|
||||
print(paste0('Error: ', cond))
|
||||
return(TRUE)
|
||||
# has_more <- FALSE
|
||||
# if(has_err) {
|
||||
# has_err <- FALSE
|
||||
# has_more <- FALSE
|
||||
# return(TRUE)
|
||||
# } else {
|
||||
# Sys.sleep(15)
|
||||
# has_err <- TRUE
|
||||
# return(FALSE)
|
||||
# }
|
||||
# return(TRUE)
|
||||
},
|
||||
warning=function(cond) {
|
||||
print(paste0('Warning: ', cond))
|
||||
return(TRUE)
|
||||
},
|
||||
finally={
|
||||
# return(TRUE)
|
||||
# print(paste0('Finally'))
|
||||
}
|
||||
)
|
||||
if(out) {
|
||||
offset <- offset + 20
|
||||
# has_more <- FALSE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(collection in c('Solana Monkey Business')) {
|
||||
print(paste0('Working on ', collection, '...'))
|
||||
has_more <- TRUE
|
||||
page <- 1
|
||||
while(has_more) {
|
||||
Sys.sleep(1)
|
||||
print(paste0('Page #', page))
|
||||
url <- get_smb_url(page)
|
||||
response <- GET(url)
|
||||
content <- rawToChar(response$content)
|
||||
content <- fromJSON(content)
|
||||
# content <- rbindlist(content, fill=T)
|
||||
content <- content %>% as.data.table()
|
||||
has_more <- nrow(content) > 0 && 'price' %in% colnames(content)
|
||||
if(has_more) {
|
||||
content <- content[, list(mint, price)]
|
||||
content <- unique(content)
|
||||
content$price <- as.numeric(content$price) / (10^9)
|
||||
has_more <- nrow(content) >= 40
|
||||
colnames(content)[1] <- 'tokenMint'
|
||||
df <- merge(content, mints, by=c('tokenMint')) %>% as.data.table()
|
||||
df <- df[, list(collection, token_id, price)]
|
||||
page <- page + 1
|
||||
solana_listings <- rbind(solana_listings, df)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
head(solana_listings)
|
||||
# head(terra_listings)
|
||||
# new_listings <- rbind(solana_listings, terra_listings)
|
||||
new_listings <- unique(solana_listings)
|
||||
|
||||
# listings <- read.csv('./data/listings.csv') %>% as.data.table()
|
||||
rem <- unique(new_listings$collection)
|
||||
sort(rem)
|
||||
listings <- listings[ !(collection %in% eval(rem)), ]
|
||||
listings <- listings[, list(collection, token_id, price)]
|
||||
listings <- rbind(listings, new_listings)
|
||||
listings <- listings[order(collection, price)]
|
||||
listings[, token_id := as.integer(token_id)]
|
||||
listings <- listings[ !(collection %in% c('LunaBulls','Galactic Punks','Galactic Angels','Levana Dragon Eggs')) ]
|
||||
|
||||
listings <- listings[!is.na(price)]
|
||||
listings <- listings %>% as.data.table()
|
||||
|
||||
sort(unique(listings$collection))
|
||||
# write.csv(unique(listings[, collection]), '~/Downloads/tmp.csv', row.names=F)
|
||||
|
||||
floors <- listings %>%
|
||||
group_by(collection) %>%
|
||||
summarize(cur_floor = min(price)) %>%
|
||||
as.data.table()
|
||||
|
||||
|
||||
get_fmp <- function(data, coefsdf, pred_price) {
|
||||
coefsdf[, tot := lin_coef + log_coef ]
|
||||
coefsdf[, lin_coef := lin_coef / tot]
|
||||
coefsdf[, log_coef := log_coef / tot]
|
||||
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
|
||||
|
||||
fmp <- merge( pred_price, coefsdf, by=c('collection') )
|
||||
fmp <- merge( fmp, data[, list(token_id, collection, block_timestamp, price, mn_20)], by=c('token_id','collection') )
|
||||
# fmp <- merge( fmp, floors, by=c('collection') )
|
||||
fmp[, abs_chg := (mn_20 - floor_price) * lin_coef ]
|
||||
fmp[, pct_chg := (mn_20 - floor_price) * log_coef ]
|
||||
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
|
||||
}
|
||||
|
||||
if(FALSE) {
|
||||
coefsdf[, tot := lin_coef + log_coef ]
|
||||
coefsdf[, lin_coef := lin_coef / tot]
|
||||
coefsdf[, log_coef := log_coef / tot]
|
||||
sum(coefsdf$log_coef) + sum(coefsdf$lin_coef)
|
||||
|
||||
fmp <- merge( pred_price, coefsdf, by=c('collection') )
|
||||
fmp <- merge( fmp, floors, by=c('collection') )
|
||||
fmp[, abs_chg := (cur_floor - floor_price) * lin_coef ]
|
||||
fmp[, pct_chg := (cur_floor - floor_price) * log_coef ]
|
||||
fmp[, fair_market_price := pred_price + abs_chg + (pct_chg * pred_price / floor_price) ]
|
||||
|
||||
mn <- fmp %>% group_by(collection, cur_floor) %>% summarize(mn = min(fair_market_price)) %>% as.data.table()
|
||||
mn[, ratio := cur_floor / mn]
|
||||
fmp <- merge(fmp, mn[, list(collection, ratio)])
|
||||
fmp[ratio < 1, fair_market_price := fair_market_price * ratio ]
|
||||
|
||||
fmp[, cur_sd := pred_sd * (cur_floor / floor_price) * SD_SCALE ]
|
||||
fmp[, price_low := qnorm(.2, fair_market_price, cur_sd) ]
|
||||
fmp[, price_high := qnorm(.8, fair_market_price, cur_sd) ]
|
||||
|
||||
fmp[, price_low := pmax(price_low, cur_floor * 0.975) ]
|
||||
fmp[, price_high := pmax(price_high, cur_floor * 1.025) ]
|
||||
|
||||
fmp[, price_low := round(price_low, 2) ]
|
||||
fmp[, price_high := round(price_high, 2) ]
|
||||
fmp[, fair_market_price := pmax(cur_floor, fair_market_price) ]
|
||||
fmp[, fair_market_price := round(fair_market_price, 2) ]
|
||||
fmp[, cur_sd := round(cur_sd, 2) ]
|
||||
head(fmp[collection == 'SOLGods'][order(fair_market_price)])
|
||||
head(fmp[(collection == 'SOLGods') & (rk <= 4654)][order(fair_market_price)])
|
||||
head(fmp[(collection == 'SOLGods') & (rk == 4654)][order(fair_market_price)])
|
||||
|
||||
tmp <- merge(listings, fmp, by = c('collection','token_id')) %>% as.data.table()
|
||||
tmp[, deal_score := pnorm(price, fair_market_price, cur_sd) ]
|
||||
tmp[, deal_score := 100 * (1 - deal_score) ]
|
||||
tmp[, vs_floor := (price / cur_floor) - 1 ]
|
||||
tmp[, vs_floor_grp := ifelse(vs_floor < .1, '<10%', ifelse(vs_floor < .25, '<25%', '>25%')) ]
|
||||
tmp[, vs_floor := (price - cur_floor) ]
|
||||
tmp <- tmp[ !(collection %in% c('Levana Dragon Eggs','Galactic Punks','LunaBulls','Galactic Angels','MAYC')) ]
|
||||
|
||||
|
||||
t2 <- tmp[order(-deal_score),.SD[2], list(vs_floor_grp, collection)] %>% as.data.table()
|
||||
t2 <- t2[, list(collection, vs_floor_grp, deal_score)][order(collection, vs_floor_grp)]
|
||||
t3 <- tmp[order(-deal_score),.SD[3], list(vs_floor_grp, collection)] %>% as.data.table()
|
||||
t3 <- t3[, list(collection, vs_floor_grp, deal_score)][order(collection, vs_floor_grp)]
|
||||
colnames(t2) <- c('collection','vs_floor_grp','deal_score_g2')
|
||||
colnames(t3) <- c('collection','vs_floor_grp','deal_score_g3')
|
||||
tmp <- merge(tmp, t2, by=c('collection','vs_floor_grp'))
|
||||
tmp <- merge(tmp, t3, by=c('collection','vs_floor_grp'))
|
||||
|
||||
|
||||
t2 <- tmp[order(-deal_score),.SD[2], list(collection)] %>% as.data.table()
|
||||
t2 <- t2[, list(collection, deal_score)][order(collection)]
|
||||
t3 <- tmp[order(-deal_score),.SD[3], list(collection)] %>% as.data.table()
|
||||
t3 <- t3[, list(collection, deal_score)][order(collection)]
|
||||
colnames(t2) <- c('collection','deal_score_2')
|
||||
colnames(t3) <- c('collection','deal_score_3')
|
||||
|
||||
tmp <- merge(tmp, t2, by=c('collection'))
|
||||
tmp <- merge(tmp, t3, by=c('collection'))
|
||||
|
||||
tmp[, pts := (deal_score * 5 - deal_score_g2 - deal_score_g3 - deal_score_2 - deal_score_3) * ((cur_floor / price)**0.75) + (100 * (1 - (( price - cur_floor ) / (fair_market_price - cur_floor)))) ]
|
||||
url <- 'https://discord.com/api/webhooks/976332557996150826/8KZqD0ov5OSj1w4PjjLWJtmgnCM9bPWaCkZUUEDMeC27Z0iqiA-ZU5U__rYU9tQI_ijA'
|
||||
unique(tmp$collection)
|
||||
for(col in c('price','pred_price','fair_market_price','vs_floor','deal_score','deal_score_2','deal_score_3','pts')) {
|
||||
if(!'price' %in% col) {
|
||||
tmp[, eval(col) := round(get(col)) ]
|
||||
} else {
|
||||
tmp[, eval(col) := ifelse(
|
||||
get(col) < 10
|
||||
, round(get(col), 2)
|
||||
, ifelse(
|
||||
get(col) < 100
|
||||
, round(get(col), 1)
|
||||
, round(get(col)))
|
||||
)
|
||||
]
|
||||
}
|
||||
}
|
||||
tmp <- tmp[order(-pts)]
|
||||
head(tmp[, list(collection, token_id, price, nft_rank, rk, pred_price, cur_floor, fair_market_price, deal_score, deal_score_2, deal_score_3, pts)], 20)
|
||||
head(tmp[, list(collection, token_id, price, nft_rank, rk, pred_price, cur_floor, fair_market_price, deal_score, deal_score_2, deal_score_3, pts)], 20)
|
||||
paste(head(tmp$label), collapse='\n')
|
||||
|
||||
tmp[, l := nchar(collection)]
|
||||
mx <- max(tmp$l)
|
||||
# tmp$clean_collection <- str_pad(collection, eval(mx) - l, side = 'right', pad = '-') ]
|
||||
tmp$n_pad <- mx - tmp$l
|
||||
tmp$clean_collection <- str_pad(tmp$collection, mx - tmp$l, side = 'right', pad = '-')
|
||||
tmp[, clean_collection := str_pad(collection, eval(mx), pad='-', side='right')]
|
||||
tmp[, clean_collection := str_pad(collection, eval(mx), pad='-', side='both')]
|
||||
tmp$clean_collection <- str_pad(tmp$collection, mx, pad='-', )
|
||||
tmp[, label := paste(clean_collection, str_pad(token_id, 4, side='left'), price, fair_market_price, deal_score, sep='\t')]
|
||||
tmp[, label := paste(
|
||||
clean_collection
|
||||
, str_pad(token_id, 4, side='left')
|
||||
, str_pad(rk, 4, side='left')
|
||||
, str_pad(price, 4, side='left')
|
||||
, str_pad(vs_floor, 5, side='left')
|
||||
, str_pad(fair_market_price, 4, side='left')
|
||||
, str_pad(deal_score, 2, side='left')
|
||||
, str_pad(deal_score_2, 2, side='left')
|
||||
, str_pad(deal_score_3, 2, side='left')
|
||||
, str_pad(pts, 3, side='left')
|
||||
, sep='\t')
|
||||
]
|
||||
header <- paste(
|
||||
str_pad('collection', mx, side='both', pad='-')
|
||||
, str_pad('id', 4, side='left')
|
||||
, str_pad('rk', 4, side='left')
|
||||
, str_pad('$', 3, side='left')
|
||||
, str_pad('floor', 5, side='left')
|
||||
, str_pad('fmp', 3, side='left')
|
||||
, str_pad('ds', 2, side='left')
|
||||
, str_pad('ds2', 2, side='left')
|
||||
, str_pad('ds3', 2, side='left')
|
||||
, str_pad('pts', 3, side='left')
|
||||
, sep='\t')
|
||||
|
||||
tmp <- tmp[order(-pts)]
|
||||
content <- tmp[ (price < 0.9 * fair_market_price) , head(.SD, 2), by = collection]
|
||||
content <- head(content[order(-pts)], 15)
|
||||
# content <- paste(c(header, content$label, collapse='\n'))
|
||||
|
||||
content <- paste(c(header, content$label), collapse='\n')
|
||||
# content <- paste(c(header, head(tmp$label, 10)), collapse='\n')
|
||||
data <- list(
|
||||
content = paste0('```',content,'```')
|
||||
)
|
||||
res <- POST(url, body = data, encode = "form", verbose())
|
||||
|
||||
# tmp <- tmp[order(-deal_score)]
|
||||
# head(tmp)
|
||||
# plot_data[, deal_score := round(100 * (1 - y))]
|
||||
# y <- pnorm(x, mu, sd)
|
||||
# tmp[, deal_score := ((fair_market_price / price) - 1) ]
|
||||
# tmp[, deal_score := ((fair_market_price / price) - 0) ]
|
||||
# tmp <- tmp[order(-deal_score)]
|
||||
# tmp <- tmp[, list(collection, token_id, fair_market_price, price, deal_score)]
|
||||
# tmp[, .SD[1:3], collection]
|
||||
|
||||
# fmp <- fmp[, list(collection, token_id, nft_rank, rk, fair_market_price, price_low, price_high)]
|
||||
fmp <- fmp[, list(collection, token_id, nft_rank, rk, fair_market_price, cur_floor, cur_sd, lin_coef, log_coef)]
|
||||
colnames(fmp)[3] <- 'rarity_rank'
|
||||
colnames(fmp)[4] <- 'deal_score_rank'
|
||||
|
||||
if (FALSE) {
|
||||
for( cur_collection in unique(fmp$collection)) {
|
||||
print(paste0('Working on ',cur_collection, '...'))
|
||||
data <- fmp[collection == eval(cur_collection)]
|
||||
KafkaGeneric(
|
||||
.topic = 'prod-data-science-uploads'
|
||||
, .url = 'https://kafka-rest-proxy.flipside.systems'
|
||||
, .project = paste0('nft-deal-score-rankings-', cur_collection)
|
||||
, .data = data
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# write the floor prices to snowflake
|
||||
data <- floors
|
||||
KafkaGeneric(
|
||||
.topic = 'prod-data-science-uploads'
|
||||
, .url = 'https://kafka-rest-proxy.flipside.systems'
|
||||
, .project = 'nft-deal-score-floors'
|
||||
, .data = data
|
||||
)
|
||||
|
||||
sort(unique(listings$collection))
|
||||
|
||||
save(
|
||||
listings
|
||||
, file = listings_file
|
||||
)
|
||||
if(!isRstudio) {
|
||||
write.csv(listings, paste0(base_dir, 'nft_deal_score_listings.csv'))
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Done updating at `r Sys.time()`
|
||||
|
||||
The end. Byeeeee.
|
||||
655
viz/upload_solana_nft_labels.py
Normal file
655
viz/upload_solana_nft_labels.py
Normal file
@ -0,0 +1,655 @@
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
# import math
|
||||
import requests
|
||||
import pandas as pd
|
||||
# import urllib.request
|
||||
import snowflake.connector
|
||||
from time import sleep
|
||||
|
||||
# from solana_model import just_float
|
||||
# from utils import clean_name, clean_token_id, format_num, merge
|
||||
|
||||
############################
|
||||
# Define Constants #
|
||||
############################
|
||||
BASE_PATH = '/home/data-science'
|
||||
DATA_FOLDER = '/rstudio-data/nft_labels'
|
||||
RPC = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
|
||||
|
||||
|
||||
|
||||
##############################
|
||||
# Load DB Connection #
|
||||
##############################
|
||||
with open('{}/data_science/util/snowflake.pwd'.format(BASE_PATH), 'r') as f:
|
||||
pwd = f.readlines()[0].strip()
|
||||
with open('{}/data_science/util/snowflake.usr'.format(BASE_PATH), 'r') as f:
|
||||
usr = f.readlines()[0].strip()
|
||||
|
||||
ctx = snowflake.connector.connect(
|
||||
user=usr,
|
||||
password=pwd,
|
||||
account='vna27887.us-east-1'
|
||||
)
|
||||
|
||||
|
||||
############################
|
||||
# Helper Functions #
|
||||
############################
|
||||
def read_csv(data_folder, fname):
|
||||
return(pd.read_csv('{}/{}.csv'.format(data_folder, fname)))
|
||||
|
||||
def write_csv(data_folder, fname, df, verbose = True):
|
||||
df.to_csv('{}/{}.csv'.format(data_folder, fname), index=False)
|
||||
if verbose:
|
||||
print('Wrote {} rows to {}'.format(len(df), fname))
|
||||
|
||||
def clean_colnames(df):
|
||||
names = [ x.lower() for x in df.columns ]
|
||||
df.columns = names
|
||||
return(df)
|
||||
|
||||
def clean_collection_name(x):
|
||||
x = re.sub('\|', '-', x).strip()
|
||||
x = re.sub('\)', '', x).strip()
|
||||
x = re.sub('\(', '', x).strip()
|
||||
x = re.sub('\'', '', x).strip()
|
||||
return(x)
|
||||
|
||||
def merge(left, right, on=None, how='inner', ensure=True, verbose=True):
|
||||
df = left.merge(right, on=on, how=how)
|
||||
if len(df) != len(left) and (ensure or verbose):
|
||||
print('{} -> {}'.format(len(left), len(df)))
|
||||
cur = left.merge(right, on=on, how='left')
|
||||
cols = set(right.columns).difference(set(left.columns))
|
||||
print(cols)
|
||||
col = list(cols)[0]
|
||||
missing = cur[cur[col].isnull()]
|
||||
print(missing.head())
|
||||
if ensure:
|
||||
assert(False)
|
||||
return(df)
|
||||
|
||||
def Convert(tup, di):
|
||||
di = dict(tup)
|
||||
return di
|
||||
|
||||
|
||||
####################################
|
||||
# Metadata From HowRare.Is #
|
||||
####################################
|
||||
def how_rare_is_api():
|
||||
query = '''
|
||||
SELECT DISTINCT LOWER(project_name) AS lower_collection
|
||||
FROM solana.core.dim_nft_metadata
|
||||
'''
|
||||
df = ctx.cursor().execute(query)
|
||||
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
|
||||
|
||||
url = 'https://api.howrare.is/v0.1/collections'
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
c_df = pd.DataFrame(j['result']['data']).sort_values('floor_marketcap', ascending=0)
|
||||
c_df['lower_collection'] = c_df.url.apply(lambda x: x.lower().strip() )
|
||||
seen = sorted(df.LOWER_COLLECTION.apply(lambda x: re.sub(' |_|\'', '', x) ).values)
|
||||
c_df['seen_1'] = c_df.url.apply(lambda x: re.sub(' |_|\'', '', x[1:]).lower() in seen ).astype(int)
|
||||
c_df['seen_2'] = c_df.name.apply(lambda x: re.sub(' |_|\'', '', x).lower() in seen ).astype(int)
|
||||
c_df['seen'] = (c_df.seen_1 + c_df.seen_2 > 0).astype(int)
|
||||
seen = seen + [ 'smb','aurory','degenapes','thugbirdz','degods','okay_bears','catalinawhalemixer','cetsoncreck','stonedapecrew','solgods' ]
|
||||
c_df = c_df[-(c_df.url.isin([ '/'+x for x in seen]))]
|
||||
c_df = c_df[c_df.seen == 0]
|
||||
it = 0
|
||||
tot = len(c_df)
|
||||
m_data = []
|
||||
print('Pulling metadata for {} collections'.format(tot))
|
||||
for row in c_df.iterrows():
|
||||
it += 1
|
||||
row = row[1]
|
||||
collection = row['name']
|
||||
print('#{} / {}: {}'.format(it, tot, collection))
|
||||
url = row['url'][1:]
|
||||
if it > 1:
|
||||
assert(len(m_data))
|
||||
url = 'https://api.howrare.is/v0.1/collections/'+url
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
n_errors = 0
|
||||
for i in j['result']['data']['items']:
|
||||
try:
|
||||
token_id = int(i['id'])
|
||||
mint = i['mint']
|
||||
image = i['image']
|
||||
for d in i['attributes']:
|
||||
d['token_id'] = token_id
|
||||
d['collection'] = collection
|
||||
d['mint_address'] = mint
|
||||
d['image_url'] = image
|
||||
m_data += [ d ]
|
||||
except:
|
||||
# print('Error')
|
||||
n_errors += 1
|
||||
pass
|
||||
if n_errors:
|
||||
print('{} errors'.format(n_errors))
|
||||
metadata = pd.DataFrame(m_data).rename(columns={'name':'feature_name', 'value':'feature_value'})
|
||||
|
||||
write_csv(DATA_FOLDER, 'howrare_labels', metadata[['collection','mint_address']])
|
||||
|
||||
a = metadata.groupby(['collection','mint_address','token_id','image_url'])[[ 'feature_name','feature_value' ]].apply(lambda g: Convert(list(map(tuple, g.values.tolist())), {}) ).reset_index()
|
||||
a.columns = ['collection','mint_address','token_id','image_url', 'token_metadata']
|
||||
a['commission_rate'] = None
|
||||
a['contract_address'] = a.mint_address
|
||||
a['contract_name'] = a.collection
|
||||
a['created_at_block_id'] = 0
|
||||
a['created_at_timestamp'] = '2021-01-01'
|
||||
a['created_at_tx_id'] = ''
|
||||
a['creator_address'] = a.mint_address
|
||||
a['creator_name'] = a.collection
|
||||
a['project_name'] = a.collection
|
||||
a['token_metadata_uri'] = a.image_url
|
||||
a['token_name'] = a.collection
|
||||
a['n'] = range(len(a))
|
||||
a['n'] = a.n.apply(lambda x: int(x/50) )
|
||||
a['token_id'] = a.token_id.astype(int)
|
||||
|
||||
# remove existing files
|
||||
fnames = os.listdir(DATA_FOLDER+'/metadata/results/')
|
||||
print('fnames')
|
||||
print(fnames)
|
||||
for f in fnames:
|
||||
os.remove(DATA_FOLDER+'/metadata/results/'+f)
|
||||
|
||||
# write new metadata incrementally to upload to solana.core.dim_nft_metadata
|
||||
n = 100000
|
||||
tot = int(len(a) / n) + 1
|
||||
for i in range(0, len(a), n):
|
||||
ind = int(i/n)
|
||||
print('#{} / {}'.format(ind, tot))
|
||||
g = a.head(i+n).tail(n).to_dict('records')
|
||||
txt = [
|
||||
{
|
||||
"model": {
|
||||
"blockchain": "solana",
|
||||
"sinks": [
|
||||
{
|
||||
"destination": "{database_name}.silver.nft_metadata",
|
||||
"type": "snowflake",
|
||||
"unique_key": "blockchain || contract_address || token_id"
|
||||
}
|
||||
],
|
||||
},
|
||||
"results": g[x:x+50]
|
||||
}
|
||||
for x in range(0, len(g), 50)
|
||||
]
|
||||
w = pd.DataFrame({'ind': range(len(txt)), 'results':[json.dumps(x) for x in txt] })
|
||||
write_csv( DATA_FOLDER, 'metadata/results/{}'.format(ind), w )
|
||||
return
|
||||
|
||||
|
||||
#################################
|
||||
# Load Data From ME API #
|
||||
#################################
|
||||
def mints_from_me():
|
||||
##################################
|
||||
# Get All ME Collections #
|
||||
##################################
|
||||
headers = {
|
||||
# 'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
|
||||
}
|
||||
data = []
|
||||
has_more = 1
|
||||
offset = 0
|
||||
while has_more:
|
||||
sleep(1)
|
||||
print(offset)
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/collections?offset={}&limit=500'.format(offset)
|
||||
r = requests.get(url)
|
||||
j = r.json()
|
||||
data = data + j
|
||||
has_more = len(j)
|
||||
offset += 500
|
||||
df = pd.DataFrame(data)
|
||||
write_csv(DATA_FOLDER, 'me_collections', df)
|
||||
# df.to_csv('{}/me_collections.csv'.format(DATA_FOLDER), index=False)
|
||||
df = read_csv(DATA_FOLDER, 'me_collections')
|
||||
# df = pd.read_csv('./data/me_collections.csv')
|
||||
|
||||
###########################################
|
||||
# Get 1 Mint From Each Collection #
|
||||
###########################################
|
||||
it = 0
|
||||
l_data = []
|
||||
# old_l_df = pd.read_csv('./data/me_mints.csv')
|
||||
old_l_df = read_csv(DATA_FOLDER, 'me_mints')
|
||||
seen = list(old_l_df.symbol.unique())
|
||||
print('We\'ve already seen {} / {} mints from ME'.format(len(seen), len(df)))
|
||||
df = df[ -df.symbol.isin(seen) ]
|
||||
df = df[ (df.symbol.notnull()) & (df.symbol != '') ]
|
||||
df = df.sort_values('symbol')
|
||||
tot = len(df)
|
||||
start = time.time()
|
||||
for row in df.iterrows():
|
||||
sleep(0.5)
|
||||
it += 1
|
||||
row = row[1]
|
||||
# print('Listings on {}...'.format(row['symbol']))
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/activities?offset=0&limit=1'.format(row['symbol'])
|
||||
if row['symbol'] in seen:
|
||||
print('Seen')
|
||||
continue
|
||||
try:
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
except:
|
||||
try:
|
||||
print('Re-trying in 10s')
|
||||
sleep(10)
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
except:
|
||||
try:
|
||||
print('Re-trying in 60s')
|
||||
sleep(60)
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
except:
|
||||
print('Re-trying in 60s (again!)')
|
||||
sleep(60)
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
if len(j):
|
||||
l_data += [[ row['symbol'], row['name'], j[0]['tokenMint'] ]]
|
||||
if it == 1 or it % 10 == 0:
|
||||
print('#{} / {} ({} records in {} secs)'.format(it, tot, len(l_data), round(time.time() - start)))
|
||||
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
|
||||
# l_df.to_csv('./data/me_mints.csv', index=False)
|
||||
l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
|
||||
l_df = pd.concat([l_df, old_l_df]).drop_duplicates(subset=['symbol'])
|
||||
print('Adding {} rows to me_mints'.format(len(l_df) - len(old_l_df)))
|
||||
# l_df.to_csv('./data/me_mints.csv', index=False)
|
||||
write_csv(DATA_FOLDER, 'me_mints', l_df)
|
||||
|
||||
|
||||
######################################################
|
||||
# Get Update Authorities For All Collections #
|
||||
######################################################
|
||||
# l_df = pd.read_csv('./data/me_mints.csv')
|
||||
# m_old = pd.read_csv('./data/me_update_authorities.csv')
|
||||
m_old = read_csv(DATA_FOLDER, 'me_update_authorities')
|
||||
m_old['seen'] = 1
|
||||
m_data = list(m_old[['symbol','name','update_authority','seen']].values)
|
||||
seen = [ x[0] for x in m_data ]
|
||||
print('Seen {} m_data'.format(len(seen)))
|
||||
l_df = l_df[-l_df.symbol.isin(seen)]
|
||||
l_df = l_df.sort_values('symbol')
|
||||
it = 0
|
||||
for row in l_df.iterrows():
|
||||
sleep(.5)
|
||||
it += 1
|
||||
row = row[1]
|
||||
symbol = row['symbol']
|
||||
print('Working on {}...'.format(symbol))
|
||||
if symbol in seen:
|
||||
print('Seen')
|
||||
continue
|
||||
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(row['mint_address'])
|
||||
try:
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
except:
|
||||
print('Re-trying in 10s')
|
||||
sleep(10)
|
||||
try:
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
except:
|
||||
print('Re-trying in 60s')
|
||||
sleep(60)
|
||||
r = requests.get(url, headers=headers)
|
||||
j = r.json()
|
||||
if 'updateAuthority' in j.keys():
|
||||
m_data += [[ row['symbol'], row['name'], j['updateAuthority'], 0 ]]
|
||||
if it % 10 == 0:
|
||||
print('it#{}: {}'.format(it, len(m_data)))
|
||||
# m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
|
||||
# m_df.to_csv('./data/me_update_authorities.csv', index=False)
|
||||
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority','seen'])
|
||||
m_df = m_df.drop_duplicates()
|
||||
print('Adding {} rows to me_update_authorities'.format(len(m_df) - len(m_old)))
|
||||
write_csv(DATA_FOLDER, 'me_update_authorities', m_df)
|
||||
# m_df.to_csv('./data/me_update_authorities.csv', index=False)
|
||||
|
||||
def pull_from_metaboss():
|
||||
|
||||
######################################################
|
||||
# Get Update Authorities For All Collections #
|
||||
######################################################
|
||||
# m_df = pd.read_csv('./data/me_update_authorities.csv')
|
||||
m_df = read_csv(DATA_FOLDER, 'me_update_authorities')
|
||||
n_auth = m_df.groupby('update_authority').name.count().reset_index().rename(columns={'name':'n_auth'})
|
||||
m_df = m_df.merge(n_auth)
|
||||
l1 = len(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1)])
|
||||
l2 = len(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1)])
|
||||
print('{} with 1 update_authority; {} with 2+ update_authority'.format(l1, l2))
|
||||
|
||||
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1) ].update_authority.unique())
|
||||
need = m_df[m_df.update_authority.isin(need)]
|
||||
# l_df = pd.read_csv('./data/me_mints.csv')
|
||||
l_df = read_csv(DATA_FOLDER, 'me_mints')
|
||||
fix = need.merge(l_df[[ 'name','mint_address' ]])
|
||||
need = fix.copy().rename(columns={'name':'collection'})
|
||||
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
|
||||
need['collection'] = need.collection.apply(lambda x: clean_collection_name(x) )
|
||||
need = need.drop_duplicates(subset=['update_authority']).sort_values('collection')
|
||||
# need = need.head(2)
|
||||
|
||||
mfiles = ['/data/mints/{}/{}_mint_accounts.json'.format(re.sub(' |-', '_', collection), update_authority) for collection, update_authority in zip(need.collection.values, need.update_authority.values) ]
|
||||
seen = [ x for x in mfiles if os.path.exists(x) ]
|
||||
seen = []
|
||||
|
||||
# for update authorities that have only 1 collection, we can just check metaboss once
|
||||
mfolder = '{}/mints/'.format(DATA_FOLDER)
|
||||
it = 0
|
||||
tot = len(need)
|
||||
for row in need.iterrows():
|
||||
it += 1
|
||||
row = row[1]
|
||||
collection = row['collection']
|
||||
print('#{} / {}: {}'.format(it, tot, collection))
|
||||
# if collection in seen:
|
||||
# continue
|
||||
update_authority = row['update_authority']
|
||||
# print('Working on {}...'.format(collection))
|
||||
collection_dir = re.sub(' |-', '_', collection)
|
||||
|
||||
dir = '{}{}/'.format(mfolder, collection_dir)
|
||||
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
|
||||
if not os.path.exists(dir):
|
||||
print(collection)
|
||||
os.makedirs(dir)
|
||||
# elif len(os.listdir(dir)) and os.path.exists(mfile):
|
||||
# print('Already have {}.'.format(collection))
|
||||
# print('Seen')
|
||||
# continue
|
||||
seen.append(update_authority)
|
||||
os.system('metaboss -r {} -T 300 snapshot mints --update-authority {} --output {}'.format(RPC, update_authority, dir))
|
||||
|
||||
# write the mints to csv
|
||||
data = []
|
||||
for path in os.listdir(mfolder):
|
||||
if os.path.isdir('{}{}'.format(mfolder, path)):
|
||||
collection = re.sub('_', ' ', path).strip()
|
||||
for fname in os.listdir(mfolder+path):
|
||||
f = mfolder+path+'/'+fname
|
||||
if os.path.isfile(f) and '.json' in f:
|
||||
with open(f) as file:
|
||||
j = json.load(file)
|
||||
for m in j:
|
||||
data += [[ collection, m ]]
|
||||
df = pd.DataFrame(data, columns=['collection','mint_address'])
|
||||
df.collection.unique()
|
||||
write_csv(DATA_FOLDER, 'single_update_auth_labels', df)
|
||||
# df.to_csv('./data/single_update_auth_labels.csv', index=False)
|
||||
|
||||
################################
|
||||
# Multiple Authorities #
|
||||
################################
|
||||
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1) ].update_authority.unique())
|
||||
need = m_df[m_df.update_authority.isin(need)]
|
||||
fix = need.merge(l_df[[ 'name','mint_address' ]])
|
||||
need = fix.copy().rename(columns={'name':'collection'})
|
||||
need['collection'] = need.collection.apply(lambda x: clean_collection_name(x) )
|
||||
need = need.sort_values('collection').drop_duplicates(subset=['update_authority'], keep='first')
|
||||
# need = need.head(2)
|
||||
it = 0
|
||||
a = []
|
||||
for row in need.iterrows():
|
||||
it += 1
|
||||
print('#{}/{}'.format(it, len(need)))
|
||||
row = row[1]
|
||||
collection = row['collection']
|
||||
update_authority = row['update_authority']
|
||||
print('Working on {}...'.format(collection))
|
||||
collection_dir = re.sub(' |-', '_', collection)
|
||||
|
||||
dir = '{}{}/'.format(mfolder, collection_dir)
|
||||
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
|
||||
if not os.path.exists(dir):
|
||||
print(collection)
|
||||
os.makedirs(dir)
|
||||
a.append(update_authority)
|
||||
os.system('metaboss -r {} -T 300 snapshot mints --update-authority {} --output {}'.format(RPC, update_authority, dir))
|
||||
|
||||
odir = dir+'output/'
|
||||
if not os.path.exists(odir):
|
||||
print('Making dir {}'.format(odir))
|
||||
os.makedirs(odir)
|
||||
os.system('metaboss -r {} -T 300 decode mint --list-file {} --output {}'.format(RPC, mfile, odir ))
|
||||
|
||||
##################################################
|
||||
# Load All The Mints for Each Collection #
|
||||
##################################################
|
||||
# now that we have the mints, create a data frame with the info for each mint in each collection
|
||||
mfolder = '{}/mints/'.format(DATA_FOLDER)
|
||||
data = []
|
||||
seen = [ x[1] for x in data ]
|
||||
it = 0
|
||||
dirs = sorted(os.listdir(mfolder))
|
||||
dirs = [ x for x in dirs if not x in ['3D_Sniping_Demons']]
|
||||
tot = len(dirs)
|
||||
for path in dirs:
|
||||
print('{} / {} ({} records)'.format(it, tot, len(data)))
|
||||
it += 1
|
||||
if os.path.isdir(mfolder+path):
|
||||
collection = re.sub('_', ' ', path).strip()
|
||||
print('Found {}'.format(collection))
|
||||
if not os.path.exists(mfolder+path+'/output/'):
|
||||
print('No output')
|
||||
continue
|
||||
fnames = os.listdir(mfolder+path+'/output/')
|
||||
print('{} files found'.format(len(fnames)))
|
||||
for fname in fnames:
|
||||
f = mfolder+path+'/output/'+fname
|
||||
if fname[:-5] in seen:
|
||||
continue
|
||||
if os.path.isfile(f) and '.json' in f:
|
||||
try:
|
||||
with open(f) as file:
|
||||
j = json.load(file)
|
||||
data += [[ collection, fname, j['name'], j['symbol'], j['uri'] ]]
|
||||
except:
|
||||
print('Error {}'.format(fname[:-5]))
|
||||
|
||||
##################################################
|
||||
# Load All The Mints for Each Collection #
|
||||
##################################################
|
||||
new_mints = pd.DataFrame(data, columns=['collection','mint_address','name','symbol','uri'])
|
||||
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
|
||||
n = len(new_mints[(new_mints.uri.isnull()) | (new_mints.uri == '')])
|
||||
tot = len(new_mints)
|
||||
pct = round(n * 100 / tot, 1)
|
||||
print('{} ({}%) rows have no uri'.format(n, pct))
|
||||
new_mints = new_mints[new_mints.uri != '']
|
||||
|
||||
# function to clean the name of each NFT (remove the number)
|
||||
def f_cn(x):
|
||||
if not x or x != x:
|
||||
return(x)
|
||||
if '#' in x[-6:]:
|
||||
x = ''.join(re.split('#', x)[:-1]).strip()
|
||||
elif bool(re.match('.+\s+[0-9]+', x)):
|
||||
x = ' '.join(re.split(' ', x)[:-1]).strip()
|
||||
return(x)
|
||||
new_mints['clean_name'] = new_mints.name.apply(lambda x: f_cn(x) )
|
||||
|
||||
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
|
||||
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
|
||||
a = new_mints.drop_duplicates(subset=['collection','clean_name','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
|
||||
symbol_only = a[a.uri > 10].collection.unique()
|
||||
b = new_mints[new_mints.collection.isin(symbol_only)].drop_duplicates(subset=['collection','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
|
||||
collection_only = b[b.uri > 10].collection.unique()
|
||||
symbol_only = [x for x in symbol_only if not x in collection_only]
|
||||
|
||||
# now get the info for each collection-name-symbol combo
|
||||
g1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','clean_name','symbol']).head(1).reset_index()
|
||||
g2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','symbol']).head(1).reset_index()
|
||||
g3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ].groupby(['collection']).head(1).reset_index()
|
||||
g = pd.concat([g1, g2, g3]).drop_duplicates(subset=['mint_address'])
|
||||
print('{} Total: {} all, {} collection-symbol {} collection'.format(len(g), len(g1), len(g2), len(g3)))
|
||||
# g.to_csv('~/Downloads/tmp-g.csv', index=False)
|
||||
|
||||
# iterate over each row to get what collection they are actually in
|
||||
# by pulling data from the uri
|
||||
uri_data = []
|
||||
it = 0
|
||||
tot = len(g)
|
||||
print(tot)
|
||||
errs = []
|
||||
seen = [ x['uri'] for x in uri_data ]
|
||||
# for row in g[ -(g.uri.isin(seen)) ].iterrows():
|
||||
for row in g.iterrows():
|
||||
row = row[1]
|
||||
it += 1
|
||||
# if it % 100 == 0:
|
||||
# uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
|
||||
# uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
|
||||
print('#{} / {}: {}'.format(it, tot, row['collection']))
|
||||
try:
|
||||
r = requests.get(row['uri'])
|
||||
j = r.json()
|
||||
j['uri'] = row['uri']
|
||||
j['row_collection'] = row['collection']
|
||||
j['row_clean_name'] = row['clean_name']
|
||||
j['row_symbol'] = row['symbol']
|
||||
j['mint_address'] = row['mint_address']
|
||||
uri_data += [j]
|
||||
except:
|
||||
print('Error')
|
||||
errs.append(row)
|
||||
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
|
||||
write_csv(DATA_FOLDER, 'uri_df', uri_df)
|
||||
# uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
|
||||
|
||||
# for each row, parse the json from the uri
|
||||
# uri_df = pd.read_csv('~/Downloads/uri_df.csv')
|
||||
# read_csv(DATA_FOLDER, 'uri_df')
|
||||
def f(x, c):
|
||||
x = str(x)
|
||||
try:
|
||||
n = json.loads(re.sub("'", "\"", x))[c]
|
||||
if type(n) == list:
|
||||
return(n[0])
|
||||
return(n)
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[c])
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[0][c])
|
||||
except:
|
||||
try:
|
||||
return(json.loads(re.sub("'", "\"", x))[0])
|
||||
except:
|
||||
return(x)
|
||||
# parse the json more
|
||||
uri_df['parsed_collection'] = uri_df.collection.apply(lambda x: f(x, 'name') )
|
||||
uri_df['parsed_family'] = uri_df.collection.apply(lambda x: f(x, 'family') )
|
||||
uri_df['clean_name'] = uri_df.name.apply( lambda x: f_cn(x) )
|
||||
# calculate what the collection name is
|
||||
uri_df['use_collection'] = uri_df.parsed_collection.replace('', None).fillna( uri_df.clean_name )#.fillna( uri_df.row_symbol )
|
||||
# uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].head()
|
||||
# uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].to_csv('~/Downloads/tmp.csv', index=False)
|
||||
len(uri_df)
|
||||
|
||||
# clean the collection name
|
||||
def f1(x):
|
||||
try:
|
||||
if len(x['use_collection']) == 1:
|
||||
return(x['clean_name'])
|
||||
if bool(re.match('.+\s+#[0-9]+', x['use_collection'])):
|
||||
return(''.join(re.split('#', x['use_collection'])[:-1]).strip())
|
||||
if '{' in x['use_collection']:
|
||||
return(x['clean_name'])
|
||||
return(x['use_collection'].strip().title())
|
||||
except:
|
||||
return(x['use_collection'].strip().title())
|
||||
uri_df['tmp'] = uri_df.apply(lambda x: f1(x), 1 )
|
||||
uri_df['use_collection'] = uri_df.apply(lambda x: f1(x), 1 )
|
||||
|
||||
# clean the mint_address
|
||||
uri_df['mint_address'] = uri_df.mint_address.apply(lambda x: re.sub('.json','', x))
|
||||
uri_df = uri_df.fillna('None')
|
||||
|
||||
for i in range(2):
|
||||
# for each collection-name-symbol combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','row_clean_name','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_clean_name','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_1'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
# for each collection-symbol combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_2'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
# for each collection combo, see how many have multiple mappings
|
||||
a = uri_df.copy().fillna('None')
|
||||
a = a[['row_collection','use_collection']].drop_duplicates().groupby(['row_collection']).use_collection.count().reset_index().rename(columns={'use_collection':'n_3'})
|
||||
uri_df = merge(uri_df, a, ensure=True)
|
||||
|
||||
uri_df['n'] = uri_df.apply(lambda x: x['n_3'] if x['row_collection'] in collection_only else x['n_2'] if x['row_collection'] in symbol_only else x['n_1'], 1 )
|
||||
print('{} / {} ({}%) have multiple collection-name-symbol mappings'.format(len(uri_df[uri_df.n > 1]), len(uri_df), round( 100.0 * len(uri_df[uri_df.n > 1]) / len(uri_df))))
|
||||
|
||||
# if there is multiple, use the parsed_family instead of the use_collection
|
||||
uri_df['use_collection'] = uri_df.apply(lambda x: x['use_collection'] if x['n'] == 1 else x['parsed_family'], 1 )
|
||||
del uri_df['n_1']
|
||||
del uri_df['n_2']
|
||||
del uri_df['n_3']
|
||||
|
||||
# only take rows where there is a single mapping
|
||||
m = uri_df[uri_df.n==1][[ 'use_collection','row_collection','row_clean_name','row_symbol' ]].dropna().drop_duplicates()
|
||||
m.columns = [ 'use_collection','collection','clean_name','symbol' ]
|
||||
|
||||
m_1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].fillna('').merge(m.fillna(''), how='left')
|
||||
m_2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address','symbol' ]].fillna('').merge(m.fillna(''), how='left')
|
||||
m_3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address' ]].fillna('').merge(m.fillna(''), how='left')
|
||||
len(m_1) + len(m_2) + len(m_3)
|
||||
len(new_mints)
|
||||
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
|
||||
m = pd.concat( [m_1, m_2, m_3] )
|
||||
print('After all this, we have {}% of the mints'.format( round(len(m) * 100 / len(new_mints)) ))
|
||||
len(new_mints)
|
||||
len(m)
|
||||
m['mint_address'] = m.mint_address.apply(lambda x: re.sub('.json', '', x) )
|
||||
m = m[['mint_address','use_collection']].dropna().drop_duplicates()
|
||||
m.columns = ['mint_address','collection']
|
||||
|
||||
m[m.collection.isnull()].head()
|
||||
m[m.collection=='Nan'].head()
|
||||
|
||||
m = m[m.collection != 'Nan']
|
||||
|
||||
tmp = m.groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
|
||||
tmp.head()
|
||||
|
||||
# m.to_csv('./data/mult_update_auth_labels.csv', index=False)
|
||||
write_csv(DATA_FOLDER, 'mult_update_auth_labels', m)
|
||||
|
||||
def compile():
|
||||
single_update_auth_labels = read_csv(DATA_FOLDER, 'single_update_auth_labels')
|
||||
mult_update_auth_labels = read_csv(DATA_FOLDER, 'mult_update_auth_labels')
|
||||
howrare_labels = read_csv(DATA_FOLDER, 'howrare_labels')
|
||||
df = pd.concat([howrare_labels, single_update_auth_labels, mult_update_auth_labels])
|
||||
df = df[ (df.collection != 'Nan') & (df.collection != 'nan') & (df.collection.notnull()) ]
|
||||
df = df[ (df.mint_address != 'Nan') & (df.mint_address != 'nan') & (df.mint_address.notnull()) ]
|
||||
df = df.drop_duplicates(subset=['mint_address'], keep='first')
|
||||
write_csv(DATA_FOLDER, 'solana_nft_labels', df[['mint_address','collection']])
|
||||
|
||||
# print('Loaded!')
|
||||
# mints_from_me()
|
||||
# pull_from_metaboss()
|
||||
# compile()
|
||||
# how_rare_is_api()
|
||||
@ -16,9 +16,12 @@ clean_names = {
|
||||
,'boredapeyachtclub': 'BAYC'
|
||||
,'mutantapeyachtclub': 'MAYC'
|
||||
,'bayc': 'BAYC'
|
||||
,'bakc': 'BAKC'
|
||||
,'mayc': 'MAYC'
|
||||
,'solgods': 'SOLGods'
|
||||
,'meerkatmillionairescc': 'Meerkat Millionaires'
|
||||
,'ggsg:galacticgeckos': 'Galactic Geckos'
|
||||
,'solstein': 'SolStein'
|
||||
# ,'stonedapecrew': 'Stoned Ape Crew'
|
||||
}
|
||||
|
||||
@ -47,6 +50,7 @@ def clean_name(name):
|
||||
name = re.sub('-', ' ', name)
|
||||
name = re.sub(' On ', ' on ', name)
|
||||
name = re.sub('Defi ', 'DeFi ', name)
|
||||
# name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
|
||||
return(name)
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user