nft-deal-score/load_data.py
flipside-kellen 43264daa2f automated
2022-07-14 15:10:02 -07:00

2074 lines
78 KiB
Python

import re
import os
import json
import time
import math
import requests
import pandas as pd
import urllib.request
import snowflake.connector
from bs4 import BeautifulSoup
from time import sleep
import cloudscraper
from theblockchainapi import SolanaAPIResource, SolanaNetwork, SearchMethod
# Get an API key pair for free here: https://dashboard.blockchainapi.com/api-keys
MY_API_KEY_ID = 'sLbjx8YFYdTtUuH'
MY_API_SECRET_KEY = 'p24pFaM9lLbWscN'
BLOCKCHAIN_API_RESOURCE = SolanaAPIResource(
api_key_id=MY_API_KEY_ID,
api_secret_key=MY_API_SECRET_KEY
)
os.chdir('/Users/kellenblumberg/git/nft-deal-score')
from solana_model import just_float
from utils import clean_name, clean_token_id, format_num, merge
#########################
# Connect to DB #
#########################
with open('snowflake.pwd', 'r') as f:
pwd = f.readlines()[0].strip()
with open('snowflake.usr', 'r') as f:
usr = f.readlines()[0].strip()
ctx = snowflake.connector.connect(
user=usr,
password=pwd,
account='vna27887.us-east-1'
)
# query = '''
# SHOW TABLES
# '''
# sales = ctx.cursor().execute(query)
# sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
# sales = clean_colnames(sales)
# sorted(sales.name.unique())
# sorted(sales.schema_name.unique())
# sorted(sales.database_name.unique())
# sales[sales.name == 'ACTIVE_VAULT_EVENTS'][['name','schema_name']]
# tables = pd.DataFrame()
# df = sales[sales.schema_name.isin(['BRONZE_MIDGARD_2_6_9','BRONZE_MIDGARD_20211108_MIDGARD']) ]
# for row in df.iterrows():
# row = row[1]
# query = 'DESCRIBE TABLE {}.{}'.format(row['schema_name'], row['name'])
# table = ctx.cursor().execute(query)
# table = pd.DataFrame.from_records(iter(table), columns=[x[0] for x in table.description])
# table = clean_colnames(table)
# table['schema_name'] = row['schema_name']
# table['table_name'] = row['name']
# table.head()
# tables = tables.append(table)
# tables['clean_table_name'] = tables.table_name.apply(lambda x: re.sub('MIDGARD_', '', x) )
# a = tables[tables.schema_name == 'BRONZE_MIDGARD_20211108_MIDGARD'][['name','clean_table_name','type']]
# b = tables[tables.schema_name == 'BRONZE_MIDGARD_2_6_9'][['name','clean_table_name','type']]
# c = a.merge(b, on=['clean_table_name','name'], how='outer')
# c['is_hevo_fivetran'] = c.name.apply(lambda x: int(x[:7] == '__HEVO_' or x[:10] == '_FIVETRAN_') )
# c['in_old'] = (c.type_x.notnull()).astype(int)
# c['in_new'] = (c.type_y.notnull()).astype(int)
# c['in_both'] = ((c.in_old + c.in_new) == 2).astype(int)
# c.to_csv('~/Downloads/tmp.csv', index=False)
d_market = {
'Galactic Punks': 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k',
'LunaBulls': 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2',
'Levana Dragon Eggs': 'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg',
'Levana Dust': 'terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7',
'Levana Meteors': 'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v',
'Galactic Angels': 'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v',
}
###################################
# Define Helper Functions #
###################################
def clean_colnames(df):
names = [ x.lower() for x in df.columns ]
df.columns = names
return(df)
def add_collection_steps():
# 1. mint_address_token_id_map
# 2. scrape metadata
metadata = pd.read_csv('./data/metadata.csv')
metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x) )
sorted(metadata.collection.unique())
metadata.to_csv('./data/metadata.csv', index=False)
metadata[metadata.collection == 'Stoned Ape Crew']
metadata[metadata.collection == 'Stoned Ape Crew'].feature_name.unique()
# 3. scrape howrareis
# 4. add sales
# 5. run model
pass
def create_upload_file():
cols = [ 'collection','mint_address' ]
a = pd.read_csv('./data/mints-2022-06-13-2pm.csv')[cols]
b = pd.read_csv('~/Downloads/manual_labels.csv')
b.columns = cols
c = pd.read_csv('~/Downloads/solscan_collections.csv')[cols]
d = pd.read_csv('./data/tokens.csv')[cols]
df = pd.concat([a, b, c, d]).drop_duplicates(subset=['mint_address'], keep='last')
df.to_csv('~/Downloads/mints-2022-06-13-5pm.csv', index=False)
tmp = pd.read_csv('~/Downloads/mints-2022-06-13-5pm.csv')
tmp[tmp.mint_address == 'EhuVN896QVypRreAt6mcJr6eKkKunVzsgSRz7qt4oeBr']
def manual_clean():
for c in [ 'pred_price', 'attributes', 'feature_values', 'model_sales', 'listings', 'coefsdf', 'tokens' ]:
df = pd.read_csv('./data/{}.csv'.format(c))
df['chain'] = 'Solana'
if c == 'tokens':
df['clean_token_id'] = df.token_id
df.to_csv('./data/{}.csv'.format(c), index=False)
def pull_from_solscan():
todo = [
['50a75e6d3d0b6d4a72b2f745fdba4b1c28bc774ca9629fe8e36053ae2fb396f8','Degen Egg']
, ['45e3f45d695e9e8775eed480cb0f5a6a957d47dcb3ed3800e454846dca9ab7fc','Genopets']
, ['a437071c6f9679e8431a072ae39421262bf289cc6ead21e38190d5b7b409e7f7','Shin Sengoku']
, ['d38349f2704e8cd1c538cc48fbea4b3e2596ac8da14b62c0eb3c07aeda7ae75e','SolStein']
, ['9e0593a4842ceb9ccdc510e6ffdf0d84f736bff2b58d5803c5002ace17df9fe0','Zillaz NFT']
, ['895d8f01108fbb6b28c5e32027c9c98e3054241927c8e59c304fa4763c5c88ea','enviroPass Tier 02']
, ['59c2a35d902f85feec4c774df503a0df2be263f763dcbcb73bce50c999fc2c78','The Fracture']
, ['e8dfb059b1dfc71cf97342a1c46793bc5e154909416a93a155929da5bba44a57','Suteki']
, ['271e0d68d069d80afbcb916e877831b060933b97e7b02e1cfb77e74b228b4745','Chillchat']
]
start = time.time()
data = []
meta = []
it = 0
tot = len(todo)
for collectionId, collection in todo:
it += 1
print('#{} / {}'.format(it, tot))
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
offset = 0
limit = 500
while True:
print(offset)
url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
r = requests.get(url)
js = r.json()['data']
offset += limit
if len(js) == 0:
break
for j in js:
data += [[ collectionId, collection, j['info']['mint'] ]]
m = j['info']['meta']
m['mint_address'] = j['info']['mint']
# m['name'] = row['name']
# m['update_authority'] = update_authority
meta += [ m ]
it += 1
end = time.time()
print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
df = pd.DataFrame(data, columns=['collection_id','collection','mint_address'])
df.to_csv('~/Downloads/solscan_collections.csv', index=False)
df[['collection','mint_address']].to_csv('~/Downloads/mints-2022-06-14-8am.csv', index=False)
df.groupby('collection').mint_address.count()
def collecitons_from_missing_tokens():
query = '''
WITH base AS (
SELECT block_timestamp::date AS date
, s.*
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
FROM solana.fact_nft_sales s
LEFT JOIN solana.dim_labels l on s.mint = l.address
WHERE marketplace in ('magic eden v1', 'magic eden v2')
AND block_timestamp >= '2022-01-01'
AND l.address IS NULL
AND sales_amount >= 10
)
SELECT *
FROM base
WHERE rn % 20 = 0
ORDER BY sales_amount DESC
LIMIT 500
'''
missing = ctx.cursor().execute(query)
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
missing = clean_colnames(missing)
missing.head()
headers = {
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
}
it = 0
tot = len(missing)
data = []
for m in missing.mint.unique():
it += 1
if it % 10 == 0:
print('#{} / {} ({})'.format(it, tot, len(data)))
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(m)
r = requests.get(url, headers=headers)
j = r.json()
data.append(j)
pass
df = pd.DataFrame(data)
df.head()[['collection','mintAddress']]
df.to_csv('~/Downloads/tmp.csv', index=False)
need = df.groupby(['collection','updateAuthority']).mintAddress.count().reset_index().sort_values('mintAddress', ascending=0)
need = need[need.mintAddress > 1].rename(columns={'updateAuthority':'update_authority'})
need.to_csv('~/Downloads/missing.csv', index=False)
need.head()
sorted(need.collection.unique())
need['collection'] = need.collection.apply(lambda x: re.sub('_', ' ', x.title()).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
us = sorted(g[g.mintAddress > 1].updateAuthority.unique())
tot = len(us)
it = 0
for u in us:
it += 1
print('#{} / {} ({})'.format(it, tot, len(data)))
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
update_authority = u
, update_authority_search_method = SearchMethod.EXACT_MATCH
)
print(u, len(nfts))
for n in nfts:
m = n['nft_metadata']
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
def manual_tags():
d = {
'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g': 'Degenerate Ape Kindergarten'
, 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF': 'Botheads'
}
a = 'FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF'
c = 'Botheads'
labels = pd.DataFrame()
for a, c in d.items():
query = '''
SELECT DISTINCT instructions[1]:parsed:info:mint::string AS mint_address
FROM solana.fact_transactions
WHERE instructions[1]:parsed:info:mintAuthority = '{}'
'''.format(a)
df = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(df), columns=[x[0] for x in df.description])
df = clean_colnames(df)
df['collection'] = c
labels = labels.append(df)
labels.to_csv('~/Downloads/manual_labels.csv', index=False)
def mints_from_me():
##################################
# Get All ME Collections #
##################################
headers = {
'Authorization': 'Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813'
}
data = []
has_more = 1
offset = 0
while has_more:
sleep(1)
print(offset)
url = 'https://api-mainnet.magiceden.dev/v2/collections?offset={}&limit=500'.format(offset)
r = requests.get(url)
j = r.json()
data = data + j
has_more = len(j)
offset += 500
df = pd.DataFrame(data)
df.to_csv('./data/me_collections.csv', index=False)
df = pd.read_csv('./data/me_collections.csv')
# lp_data = []
# has_more = 1
# offset = 0
# while has_more:
# sleep(1)
# print(offset)
# url = 'https://api-mainnet.magiceden.dev/v2/launchpad/collections?offset={}&limit=500'.format(offset)
# r = requests.get(url)
# j = r.json()
# lp_data = lp_data + j
# has_more = len(j)
# offset += 500
# lp_df = pd.DataFrame(lp_data)
# lp_df.to_csv('./data/me_lp_collections.csv', index=False)
# lp_df = pd.read_csv('./data/me_lp_collections.csv')
###########################################
# Get 1 Mint From Each Collection #
###########################################
it = 0
l_data = []
old_l_df = pd.read_csv('./data/me_mints.csv')
seen = list(old_l_df.symbol.unique())
df = df[ -df.symbol.isin(seen) ]
df = df.sort_values('symbol')
for row in df.iterrows():
it += 1
row = row[1]
print('Listings on {}...'.format(row['symbol']))
url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/activities?offset=0&limit=1'.format(row['symbol'])
if row['symbol'] in seen:
print('Seen')
continue
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 10s')
sleep(10)
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 60s')
sleep(60)
r = requests.get(url, headers=headers)
j = r.json()
if len(j):
l_data += [[ row['symbol'], row['name'], j[0]['tokenMint'] ]]
if it % 10 == 0:
print('it#{}: {}'.format(it, len(l_data)))
l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
l_df.to_csv('./data/me_mints.csv', index=False)
l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
l_df = l_df.append(old_l_df).drop_duplicates(subset=['symbol'])
print('Adding {} rows to me_mints'.format(len(l_df) - len(old_l_df)))
l_df.to_csv('./data/me_mints.csv', index=False)
# it = 0
# l_data = []
# seen = [ x[0] for x in l_data ]
# print(len(seen))
# for row in df.iterrows():
# it += 1
# row = row[1]
# print('Listings on {}...'.format(row['symbol']))
# url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/listings?offset=0&limit=1'.format(row['symbol'])
# if row['symbol'] in seen:
# print('Seen')
# continue
# try:
# r = requests.get(url)
# j = r.json()
# except:
# print('Re-trying in 10s')
# sleep(10)
# try:
# r = requests.get(url)
# j = r.json()
# except:
# print('Re-trying in 60s')
# sleep(60)
# r = requests.get(url)
# j = r.json()
# if len(j):
# l_data += [[ row['symbol'], row['name'], j[0]['tokenMint'] ]]
# if it % 10 == 0:
# print('it#{}: {}'.format(it, len(l_data)))
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
# get missing collections
query = '''
WITH base AS (
SELECT block_timestamp::date AS date
, s.*
, ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS rn
FROM solana.fact_nft_sales s
LEFT JOIN solana.dim_labels l on s.mint = l.address
WHERE marketplace in ('magic eden v1', 'magic eden v2')
AND block_timestamp >= '2022-01-01'
AND block_timestamp <= '2022-05-20'
AND l.address IS NULL
AND sales_amount > 20
)
SELECT *
FROM base
WHERE rn % 50 = 1
LIMIT 100
'''
missing = ctx.cursor().execute(query)
missing = pd.DataFrame.from_records(iter(missing), columns=[x[0] for x in missing.description])
missing = clean_colnames(missing)
######################################################
# Get Update Authorities For All Collections #
######################################################
l_df = pd.read_csv('./data/me_mints.csv')
len(l_df)
l_df.head()
m_old = pd.read_csv('./data/me_update_authorities.csv')
m_old['seen'] = 1
m_data = list(m_old[['symbol','name','update_authority']].values)
seen = [ x[0] for x in m_data ]
print('Seen {} m_data'.format(len(seen)))
l_df = l_df[-l_df.symbol.isin(seen)]
l_df = l_df.sort_values('symbol')
it = 0
for row in l_df.iterrows():
sleep(.5)
it += 1
row = row[1]
symbol = row['symbol']
print('Working on {}...'.format(symbol))
if symbol in seen:
print('Seen')
continue
url = 'https://api-mainnet.magiceden.dev/v2/tokens/{}'.format(row['mint_address'])
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 10s')
sleep(10)
try:
r = requests.get(url, headers=headers)
j = r.json()
except:
print('Re-trying in 60s')
sleep(60)
r = requests.get(url, headers=headers)
j = r.json()
if 'updateAuthority' in j.keys():
m_data += [[ row['symbol'], row['name'], j['updateAuthority'] ]]
if it % 10 == 0:
print('it#{}: {}'.format(it, len(m_data)))
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
m_df.to_csv('./data/me_update_authorities.csv', index=False)
m_df = pd.DataFrame(m_data, columns=['symbol','name','update_authority'])
m_df = m_df.drop_duplicates()
print('Adding {} rows to me_update_authorities'.format(len(m_df) - len(m_old)))
m_df.to_csv('./data/me_update_authorities.csv', index=False)
m_df.tail(134).head(20)
m_df = m_df.tail(134)
query = '''
SELECT DISTINCT project_name, LOWER(project_name) AS lower_name
FROM crosschain.address_labels
WHERE blockchain = 'solana'
AND label_subtype = 'nf_token_contract'
AND project_name IS NOT NULL
'''
labels = ctx.cursor().execute(query)
labels = pd.DataFrame.from_records(iter(labels), columns=[x[0] for x in labels.description])
labels = clean_colnames(labels)
labels.to_csv('~/Downloads/tmp-la.csv', index=False)
######################################################
# Get Update Authorities For All Collections #
######################################################
m_df = pd.read_csv('./data/me_update_authorities.csv')
m_df['seen'] = (-m_df.name.isin(m_df.name.tail(134).values)).astype(int)
m_df['lower_name'] = m_df.name.apply(lambda x: x.lower() )
seen = list(labels.lower_name.unique())
m_df['seen'] = m_df.lower_name.isin(seen).astype(int)
n_auth = m_df.groupby('update_authority').name.count().reset_index().rename(columns={'name':'n_auth'})
m_df = m_df.merge(n_auth)
len(m_df[m_df.seen == 0])
len(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1)])
len(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1)])
m_df.to_csv('~/Downloads/tmp-m_df.csv', index=False)
len(m_df.name.unique())
need = list(m_df[m_df.seen == 0].update_authority.unique())
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth == 1) ].update_authority.unique())
len(need)
# need = need + [
# need = [
# 'CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k' # taiyo
# , 'DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX' # DAA
# , 'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g' # Degen Egg
# , 'BL5U8CoFPewr9jFcKf3kE1BhdFS1J59cwGpeZrm7ZTeP' # Skullbot
# , 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf' # Boryoku
# , '7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D' # mindfolk
# , 'CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5' # jelly rascals
# , 'EcxEqUj4RNgdGJwPE3ktsM99Ea9ThPmXHUV5g37Qm4ju' # women monkey
# , 'EQSoRhbN9fEEYXKEE5Lg63Mqf17P3JydcWTvDhdMJW1N' # hydrascripts
# , '75CPiM9ywLgxhii9SQsNoA1SH3h66o5EhrYsazHR5Tqk' # hydrascripts
# , 'aury7LJUae7a92PBo35vVbP61GX8VbyxFKausvUtBrt' # aurory
# , 'ET3LWbEL6q4aUSjsX5xLyWktCwqKh6qsQE5j6TDZtZBY' # enviropass
# , '8ERR2gYrvXcJFuoNAbPRvHXtrJnAXXHgXKkVviwz9R6C' # enviroPass
# , 'GRDCbZBP1x2JxYf3rQQoPFGzF57LDPy7XtB1gEMaCqGV' # Space Robots
# , 'GenoS3ck8xbDvYEZ8RxMG3Ln2qcyoAN8CTeZuaWgAoEA' # Genopet
# , 'STEPNq2UGeGSzCyGVr2nMQAzf8xuejwqebd84wcksCK' # stepn
# , 'HcS8iaEHwUino8wKzcgC16hxHodnPCyacVYUdBaSZULP' # BASC
# , 'AvkbtawpmMSy571f71WsWEn41ATHg5iHw27LoYJdk8QA' # THUG
# , 'GH4QhJznKEHHv44AqEH5SUohkUauWyAFtu5u8zUWUKL4' # StepN Shoebox
# , 'FTQmhcD7SNBWrVxTgQMFr7xL2aA6adfAJJPBxGKU4VsZ' # Solstien
# ]
need = m_df[m_df.update_authority.isin(need)]
# m_df[m_df.lower_name.isin(seen)]
# m_df[-m_df.lower_name.isin(seen)]
# tmp = m_df[['update_authority','collection']].drop_duplicates().groupby(['update_authority']).collection.count().reset_index().rename(columns={'collection':'n_collection'})
# tmp = tmp.sort_values('n_collection', ascending=0)
# m_df = m_df.merge(tmp)
# m_df = m_df.sort_values(by=['n_collection','update_authority','collection'], ascending=[0,0,0])
l_df = pd.read_csv('./data/me_mints.csv')
fix = need.merge(l_df[[ 'name','mint_address' ]])
# len(need.name.unique())
# len(fix.name.unique())
# fix = fix.sort_values(by=['update_authority','collection'], ascending=[0,0])
# fix.head()
# seen = []
# data = []
# meta = []
# fix = fix[-(fix.name.isin(seen))]
# start = time.time()
# it = 0
# tot = len(fix)
# scraper = cloudscraper.create_scraper()
# # for each collection
# for row in fix.iterrows():
# row = row[1]
# print(row['name'])
# if row['name'] in seen:
# print('Seen')
# continue
# url = 'https://api.solscan.io/nft/detail?mint={}'.format(row['mint_address'])
# t = scraper.get(url).text
# j = json.loads(t)
# # r = requests.get(url)
# # j = r.json()
# j['data']
# if not j['success']:
# print('Error')
# print(r)
# print(j)
# sleep(1)
# continue
# update_authority = j['data']['updateAuthority']
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
# offset = 0
# limit = 500
# while True:
# print(offset)
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
# r = requests.get(url)
# js = r.json()['data']
# offset += limit
# if len(js) == 0:
# break
# for j in js:
# data += [[ update_authority, collectionId, collection, row['symbol'], row['name'], row['collection'], j['info']['mint'] ]]
# m = j['info']['meta']
# m['mint_address'] = j['info']['mint']
# m['name'] = row['name']
# m['update_authority'] = update_authority
# meta += [ m ]
# it += 1
# end = time.time()
# print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
# old = pd.read_csv('./data/nft_label_tokens.csv')
# token_df = pd.DataFrame(data, columns=['update_authority','collectionId','solscan_collection','symbol','name','collection','mint'])
# token_df = token_df.append(old).drop_duplicates()
# token_df.to_csv('./data/nft_label_tokens.csv', index=False)
# old = pd.read_csv('./data/nft_label_metadata.csv')
# meta_df = pd.DataFrame(meta)
# meta_df = meta_df.append(old).drop_duplicates()
# meta_df.to_csv('./data/nft_label_metadata.csv', index=False)
# seen = list(token_df.name.unique())
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
# tmp[tmp.collection > 1]
# m_df.head()
# def f(x):
# x = re.sub('\(|\)', '', x)
# x = re.sub(' ', '_', x)
# x = re.sub('\'', '', x)
# return(x)
# m_df['collection'] = m_df.name.apply(lambda x: f(x) )
# x = 'asf (asf)'
# f(x)
# query = '''
# WITH base AS (
# SELECT *
# , ROW_NUMBER() OVER (PARTITION BY project_name ORDER BY insert_date DESC) AS rn
# FROM crosschain.address_labels
# WHERE blockchain = 'solana'
# AND label_subtype = 'nf_token_contract'
# )
# SELECT *
# FROM base
# '''
# examples = ctx.cursor().execute(query)
# examples = pd.DataFrame.from_records(iter(examples), columns=[x[0] for x in examples.description])
# examples = clean_colnames(examples)
# examples.head()
# examples[examples.address_name == 'paradisedao'].head()
# examples[examples.address == 'GUXSatf5AAFKmuQgSgn4GoGzBEhwJ9WAQRxeVt1vZvkb'].head()
# # m_df = pd.read_csv('./data/me_update_authorities.csv')
# # fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# len(m_df[m_df.n_collection > 1].name.unique())
# len(fix.name.unique())
# j = list(fix.address.unique())
# with open('./data/fix_mints.json', 'w') as f:
# json.dump(j, f)
# seen = list(examples.address.unique())
# seen = []
# need = df[-df.mint_address.isin(seen)].sort_values(['collection','mint_address'])
# CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k - taiyo
# DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX - DAA
# DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf - Boryoku
# 7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D - mindfolk
# CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5 - mindfolk
need = fix.copy().rename(columns={'name':'collection'})
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
need = need.drop_duplicates(subset=['update_authority']).sort_values('collection')
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
need.collection.unique()
# need = need.drop_duplicates(subset=['collection']).sort_values('collection')
n = 0
# 1310 - 310
# need = need.tail(n).head(300).tail(25)
# need = need.tail(1009).head(17)
# need = need.tail(1009 - 17).head(17)
# 1-285, 1310-975
len(need)
# print(n)
mfiles = ['/data/mints/{}/{}_mint_accounts.json'.format(re.sub(' |-', '_', collection), update_authority) for collection, update_authority in zip(need.collection.values, need.update_authority.values) ]
seen = [ x for x in mfiles if os.path.exists(x) ]
seen = []
# for update authorities that have only 1 collection, we can just check metaboss once
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
# need = need.tail(400)
it = 0
tot = len(need)
for row in need.iterrows():
it += 1
row = row[1]
collection = row['collection']
print('#{} / {}: {}'.format(it, tot, collection))
# if collection in seen:
# continue
update_authority = row['update_authority']
# print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = './data/mints/{}/'.format(collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
seen.append(update_authority)
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# write the mints to csv
data = []
for path in os.listdir('./data/mints/'):
if os.path.isdir('./data/mints/'+path):
collection = re.sub('_', ' ', path).strip()
for fname in os.listdir('./data/mints/'+path):
f = './data/mints/'+path+'/'+fname
if os.path.isfile(f) and '.json' in f:
with open(f) as file:
j = json.load(file)
for m in j:
data += [[ collection, m ]]
df = pd.DataFrame(data, columns=['collection','mint_address'])
df.collection.unique()
df.to_csv('./data/single_update_auth_labels.csv', index=False)
################################
# Multiple Authorities #
################################
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
need = list(m_df[ (m_df.seen == 0) & (m_df.n_auth > 1) ].update_authority.unique())
need = m_df[m_df.update_authority.isin(need)]
fix = need.merge(l_df[[ 'name','mint_address' ]])
need = fix.copy().rename(columns={'name':'collection'})
need = need.sort_values('collection').drop_duplicates(subset=['update_authority'], keep='first')
i = 5
sz = 112
t = len(need) - (sz * (i - 1)) if sz * i > len(need) else sz
print(t)
need = need.head(sz * i).tail(t)
# need = need.head(150 * 2).tail(150)
# need = need.head(150 * 3).tail(150)
# need = need.head(150 * 4).tail(150)
need['collection'] = need.collection.apply(lambda x: re.sub('\|', '-', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\)', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\(', '', x).strip() )
need['collection'] = need.collection.apply(lambda x: re.sub('\'', '', x).strip() )
need.collection.unique()
it = 0
a = []
print(i)
for row in need.iterrows():
it += 1
# if it < 20:
# continue
# if it % 100 == 0:
# print('#{}/{}'.format(it, len(m_df)))
print('#{}/{}'.format(it, len(need)))
row = row[1]
collection = row['collection']
if collection in seen:
continue
update_authority = row['update_authority']
print('Working on {}...'.format(collection))
collection_dir = re.sub(' |-', '_', collection)
dir = './data/mints/{}/'.format(collection_dir)
mfile = '{}{}_mint_accounts.json'.format(dir, update_authority)
if not os.path.exists(dir):
print(collection)
os.makedirs(dir)
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
print('LETS GOOO')
a.append(update_authority)
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# len(need)
# len(need.drop_duplicates(subset=['mint_address']))
# len(need.collection.unique())
# tot = len(need.collection.unique())
# it = 0
# # for each collection, get all the mints from metaboss
# for c in need.collection.unique():
# it += 1
# print('#{} / {}: {}'.format(it, tot, c))
# dir = './data/fix_labels_1/{}/'.format(re.sub(' ', '_', c))
odir = dir+'output/'
# if not os.path.exists(dir):
# print('Making dir {}'.format(dir))
# os.makedirs(dir)
if not os.path.exists(odir):
print('Making dir {}'.format(odir))
os.makedirs(odir)
# elif os.path.exists(dir+'mints.json'):
# print('Already Seen')
# continue
# ms = list(need[need.collection == c].mint_address.unique())
# with open(dir+'mints.json', 'w') as f:
# json.dump(ms, f)
os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, mfile, odir ))
##################################################
# Load All The Mints for Each Collection #
##################################################
# now that we have the mints, create a data frame with the info for each mint in each collection
data = []
seen = [ x[1] for x in data ]
it = 0
dirs = os.listdir('./data/mints/')
for path in dirs:
print(it)
it += 1
if os.path.isdir('./data/mints/'+path):
collection = re.sub('_', ' ', path).strip()
if not os.path.exists('./data/mints/'+path+'/output/'):
continue
fnames = os.listdir('./data/mints/'+path+'/output/')
print(collection, len(fnames))
for fname in fnames:
f = './data/mints/'+path+'/output/'+fname
if fname[:-5] in seen:
continue
if os.path.isfile(f) and '.json' in f:
try:
with open(f) as file:
j = json.load(file)
data += [[ collection, fname, j['name'], j['symbol'], j['uri'] ]]
except:
print('Error {}'.format(fname[:-5]))
##################################################
# Load All The Mints for Each Collection #
##################################################
new_mints = pd.DataFrame(data, columns=['collection','mint_address','name','symbol','uri'])
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
n = len(new_mints[(new_mints.uri.isnull()) | (new_mints.uri == '')])
tot = len(new_mints)
pct = round(n * 100 / tot, 1)
print('{} ({}%) rows have no uri'.format(n, pct))
new_mints = new_mints[new_mints.uri != '']
# function to clean the name of each NFT (remove the number)
def f_cn(x):
if not x or x != x:
return(x)
if '#' in x[-6:]:
x = ''.join(re.split('#', x)[:-1]).strip()
elif bool(re.match('.+\s+[0-9]+', x)):
x = ' '.join(re.split(' ', x)[:-1]).strip()
return(x)
new_mints['clean_name'] = new_mints.name.apply(lambda x: f_cn(x) )
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
a = new_mints.drop_duplicates(subset=['collection','clean_name','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
symbol_only = a[a.uri > 10].collection.unique()
b = new_mints[new_mints.collection.isin(symbol_only)].drop_duplicates(subset=['collection','symbol']).groupby(['collection']).uri.count().reset_index().sort_values('uri', ascending=0)
collection_only = b[b.uri > 10].collection.unique()
# now get the info for each collection-name-symbol combo
g1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','clean_name','symbol']).head(1).reset_index()
g2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].groupby(['collection','symbol']).head(1).reset_index()
g3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ].groupby(['collection']).head(1).reset_index()
g = g1.append(g2).append(g3).drop_duplicates(subset=['mint_address'])
print('{} Total: {} all, {} collection-symbol {} collection'.format(len(g), len(g1), len(g2), len(g3)))
g.to_csv('~/Downloads/tmp-g.csv', index=False)
# iterate over each row to get what collection they are actually in
# by pulling data from the uri
uri_data = []
it = 0
tot = len(g)
print(tot)
errs = []
seen = [ x['uri'] for x in uri_data ]
# for row in g.iterrows():
for row in g[ -(g.uri.isin(seen)) ].iterrows():
row = row[1]
it += 1
if it % 100 == 0:
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
print('#{} / {}: {}'.format(it, tot, row['collection']))
try:
r = requests.get(row['uri'])
j = r.json()
j['uri'] = row['uri']
j['row_collection'] = row['collection']
j['row_clean_name'] = row['clean_name']
j['row_symbol'] = row['symbol']
j['mint_address'] = row['mint_address']
uri_data += [j]
except:
print('Error')
errs.append(row)
uri_df = pd.DataFrame(uri_data)[[ 'collection','name','symbol','row_symbol','row_collection','uri','row_clean_name','mint_address' ]]
uri_df.to_csv('~/Downloads/uri_df.csv', index=False)
# for each row, parse the json from the uri
uri_df = pd.read_csv('~/Downloads/uri_df.csv')
def f(x, c):
x = str(x)
try:
n = json.loads(re.sub("'", "\"", x))[c]
if type(n) == list:
return(n[0])
return(n)
except:
try:
return(json.loads(re.sub("'", "\"", x))[c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0][c])
except:
try:
return(json.loads(re.sub("'", "\"", x))[0])
except:
return(x)
# parse the json more
uri_df['parsed_collection'] = uri_df.collection.apply(lambda x: f(x, 'name') )
uri_df['parsed_family'] = uri_df.collection.apply(lambda x: f(x, 'family') )
uri_df['clean_name'] = uri_df.name.apply( lambda x: f_cn(x) )
# calculate what the collection name is
uri_df['use_collection'] = uri_df.parsed_collection.replace('', None).fillna( uri_df.clean_name )#.fillna( uri_df.row_symbol )
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].head()
uri_df[uri_df.use_collection == 'nan'][['use_collection','parsed_collection','parsed_family','clean_name','name','collection','symbol','row_symbol','row_collection']].to_csv('~/Downloads/tmp.csv', index=False)
len(uri_df)
# clean the collection name
def f1(x):
try:
if len(x['use_collection']) == 1:
return(x['clean_name'])
if bool(re.match('.+\s+#[0-9]+', x['use_collection'])):
return(''.join(re.split('#', x['use_collection'])[:-1]).strip())
if '{' in x['use_collection']:
return(x['clean_name'])
return(x['use_collection'].strip().title())
except:
return(x['use_collection'].strip().title())
uri_df['tmp'] = uri_df.apply(lambda x: f1(x), 1 )
uri_df[uri_df.tmp == 'Nan']['use_collection','tmp']
uri_df['use_collection'] = uri_df.apply(lambda x: f1(x), 1 )
sorted(uri_df.use_collection.unique())[:20]
sorted(uri_df.use_collection.unique())[-20:]
# clean the mint_address
uri_df['mint_address'] = uri_df.mint_address.apply(lambda x: re.sub('.json','', x))
uri_df.head()
uri_df = uri_df.fillna('None')
for i in range(2):
# for each collection-name-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_clean_name','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_clean_name','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_1'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection-symbol combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','row_symbol','use_collection']].drop_duplicates().groupby(['row_collection','row_symbol']).use_collection.count().reset_index().rename(columns={'use_collection':'n_2'})
uri_df = merge(uri_df, a, ensure=True)
# for each collection combo, see how many have multiple mappings
a = uri_df.copy().fillna('None')
a = a[['row_collection','use_collection']].drop_duplicates().groupby(['row_collection']).use_collection.count().reset_index().rename(columns={'use_collection':'n_3'})
uri_df = merge(uri_df, a, ensure=True)
uri_df['n'] = uri_df.apply(lambda x: x['n_3'] if x['row_collection'] in collection_only else x['n_2'] if x['row_collection'] in symbol_only else x['n_1'], 1 )
print('{} / {} ({}%) have multiple collection-name-symbol mappings'.format(len(uri_df[uri_df.n > 1]), len(uri_df), round( 100.0 * len(uri_df[uri_df.n > 1]) / len(uri_df))))
# if there is multiple, use the parsed_family instead of the use_collection
uri_df['use_collection'] = uri_df.apply(lambda x: x['use_collection'] if x['n'] == 1 else x['parsed_family'], 1 )
del uri_df['n_1']
del uri_df['n_2']
del uri_df['n_3']
# only take rows where there is a single mapping
m = uri_df[uri_df.n==1][[ 'use_collection','row_collection','row_clean_name','row_symbol' ]].dropna().drop_duplicates()
m.columns = [ 'use_collection','collection','clean_name','symbol' ]
m_1 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ].fillna('').merge(m.fillna(''), how='left')
m_2 = new_mints[ ((new_mints.collection.isin(symbol_only))) & (-(new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address','symbol' ]].fillna('').merge(m.fillna(''), how='left')
m_3 = new_mints[ (-(new_mints.collection.isin(symbol_only))) & ((new_mints.collection.isin(collection_only))) ][[ 'collection','mint_address' ]].fillna('').merge(m.fillna(''), how='left')
len(m_1) + len(m_2) + len(m_3)
len(new_mints)
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
m = m_1.append(m_2).append(m_3)
print('After all this, we have {}% of the mints'.format( round(len(m) * 100 / len(new_mints)) ))
len(new_mints)
len(m)
m['mint_address'] = m.mint_address.apply(lambda x: re.sub('.json', '', x) )
m = m[['mint_address','use_collection']].dropna().drop_duplicates()
m.columns = ['mint_address','collection']
m[m.collection.isnull()].head()
m[m.collection=='Nan'].head()
m = m[m.collection != 'Nan']
tmp = m.groupby('collection').mint_address.count().reset_index().sort_values('mint_address', ascending=0)
tmp.head()
m.to_csv('./data/mult_update_auth_labels.csv', index=False)
################
# DONE #
################
tokens = m.append(pd.read_csv('./data/tokens.csv')[['collection','mint_address']]).drop_duplicates(subset=['mint_address'], keep='last')
tokens.to_csv('./data/mints-2022-06-13-2pm.csv', index=False)
tokens.head()
m.to_csv('./data/mints-2022-06-09.csv', index=False)
m = pd.read_csv('./data/mints-2022-06-09.csv')
m.groupby('collection').head(1).to_csv('~/Downloads/tmp.csv', index=False)
len(m)
len(m.mint_address.unique())
m.head()
m.head()
# m = m.merge(symbol_map, how='left', on='symbol')
# m['use_collection'] = m.use_collection_x.fillna(m.use_collection_y)
len(new_mints)
len(m)
len(m[m.use_collection.isnull()])
len(m[m.use_collection.isnull()]) / len(m)
len(m[m.use_collection_x.isnull()]) / len(m)
m[m.use_collection.isnull()].fillna('').drop_duplicates(subset=['collection','clean_name','symbol']).to_csv('~/Downloads/tmp-3.csv', index=False)
m[m.use_collection.isnull()].drop_duplicates(subset=['collection']).to_csv('~/Downloads/tmp-3.csv', index=False)
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')].groupby('row_clean_name').uri.count().reset_index()
a = uri_df[(uri_df.parsed_collection.isnull()) | (uri_df.parsed_collection == '')]
uri_df.head()
uri_df['row_clean_name'] = uri_df.row_clean_name.apply(lambda x: f_cn(x) )
id_map = uri_df
a.to_csv('~/Downloads/tmp-1.csv', index=False)
len(uri_df)
n = uri_df.groupby()
uri_df
uri_df
uri_df.head()
uri_df[['symbol','collection','']]
uri_df.head()
query = '''
SELECT DISTINCT project_name
FROM crosschain.address_labels
WHERE blockchain = 'solana'
AND label_subtype = 'nf_token_contract'
AND project_name IS NOT NULL
'''
labels = ctx.cursor().execute(query)
labels = pd.DataFrame.from_records(iter(labels), columns=[x[0] for x in labels.description])
labels = clean_colnames(labels)
seen = [ x for x in m_df.collection.unique() if os.path.exists('./data/mints/{}/'.format(x)) and len(os.listdir('./data/mints/{}/'.format(x))) ]
seen = seen + [ re.sub('_', '', f(x.lower())) for x in labels.project_name.unique() ]
m_df = m_df[m_df.symbol.notnull()]
m_df['tmp'] = m_df.name.apply(lambda x: re.sub('_', '', f(x.lower())))
m_df[m_df.symbol == 'the_last_apes']
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
len(m_df[m_df.tmp.isin(seen)])
[x for x in seen if not x in m_df.tmp.unique()][:11]
m_df[m_df.symbol == 'apesquad']
m_df[m_df.symbol == 'chimp_frens']
url = 'https://api.solscan.io/nft/detail?mint=D5pT5HYPeQkHD6ryoHxnc2jdcUMYmjs6sS6LswbSDsuy'
us = sorted(m_df[m_df.n_collection > 1].update_authority.unique())
u = us[1]
m_df[m_df.update_authority == u]
m_df[m_df.mint == 'G3xiAFZEp49BJc8nNrDJxwTXZ34teKH7CRf5KTGakxte']
data = []
for u in us[:10]:
nfts = BLOCKCHAIN_API_RESOURCE.search_nfts(
update_authority = u
, update_authority_search_method = SearchMethod.EXACT_MATCH
)
print(u, len(nfts))
for n in nfts:
m = n['nft_metadata']
data += [[ m['update_authority'], m['mint'], m['data']['symbol'], m['data']['name'] ]]
nft_df = pd.DataFrame(data, columns=['update_authority','mint','symbol','name'])
len(nft_df.update_authority.unique())
nft_df['collection'] = nft_df.name.apply(lambda x: re.split('#', x)[0].strip() )
nft_df.groupby(['symbol','collection']).mint.count()
nft_df.groupby(['symbol','name']).mint.count()
print(len(seen))
# m_df = m_df.merge(lp_df)
len(m_df)
it = 0
m_df = m_df[(-m_df.tmp.isin(seen)) & (-m_df.collection.isin(seen)) & (-m_df.name.isin(seen))]
rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
len(seen)
for row in m_df.sort_values('collection').iterrows():
it += 1
# if it < 20:
# continue
if it % 100 == 0:
print('#{}/{}'.format(it, len(m_df)))
row = row[1]
collection = row['collection']
if collection in seen:
continue
update_authority = row['update_authority']
print('Working on {}...'.format(collection))
collection_dir = re.sub(' ', '_', collection)
dir = './data/mints/{}/'.format(collection_dir)
if not os.path.exists(dir):
os.makedirs(dir)
elif len(os.listdir(dir)):
# print('Already have {}.'.format(collection))
print('Seen')
continue
os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# os.system('metaboss -r {} -t 300 derive metadata mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
# fname = os.listdir(dir)
# if len(fname) == 1:
# fname = dir+fname[0]
# dir_mints = '{}mints/'.format(dir)
# if not os.path.exists(dir_mints):
# os.makedirs(dir_mints)
# os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, fname, dir_mints))
data = []
for path in os.listdir('./data/mints/'):
if os.path.isdir('./data/mints/'+path):
collection = re.sub('_', ' ', path).strip()
for fname in os.listdir('./data/mints/'+path):
f = './data/mints/'+path+'/'+fname
if os.path.isfile(f) and '.json' in f:
with open(f) as file:
j = json.load(file)
for m in j:
data += [[ collection, m ]]
df = pd.DataFrame(data, columns=['collection','mint_address'])
df = df[df.collection != 'etc']
# df = df.drop_duplicates(subset='mint_address')
df = df.drop_duplicates()
df['n'] = 1
g = df.groupby(['mint_address']).n.sum().reset_index()
g = g[g.n > 1]
len(g)
tmp_0 = g[['mint_address']].merge(df).groupby('collection').n.count().reset_index().sort_values('n', ascending=0)
tmp_0.head(20)
tmp_0.to_csv('~/Downloads/tmp.csv', index=False)
tmp = g.merge(df[[ 'collection','mint_address' ]])
tmp = tmp.sort_values(['mint_address','collection'])
tmp.collection.unique()
len(tmp.collection.unique())
len(df.collection.unique())
rem = tmp.collection.unique()
df = df[-df.collection.isin(rem)]
df.to_csv('~/Downloads/solana_nft_tags.csv', index=False)
def mint_address_token_id_map_2():
old = pd.read_csv('./data/mint_address_token_id_map.csv')
old = pd.DataFrame()
mints = pd.read_csv('./data/solana_mints.csv')
data = []
for collection in [ 'Stoned Ape Crew','DeGods' ]:
for m in mints[mints.collection == collection].mint_address.unique():
pass
f = open('./data/mints/{}/{}.json'.format(collection, m))
j = json.load(f)
try:
token_id = int(re.split('#', j['name'])[1])
data += [[ collection, m, token_id, j['uri'] ]]
except:
print(m)
df = pd.DataFrame(data, columns=['collection','mint','token_id','uri'])
old = old.append(df).drop_duplicates()
print(old[old.token_id.notnull()].groupby('collection').token_id.count())
old.to_csv('./data/mint_address_token_id_map.csv', index=False)
def mint_address_token_id_map():
mints = pd.read_csv('./data/solana_mints.csv')
mints[mints.collection == 'Stoned Ape Crew'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
mints[mints.collection == 'Degods'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
mints[mints.collection == 'DeGods'][['mint_address']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
old = pd.read_csv('./data/mint_address_token_id_map.csv')
my_file = open('./scripts/solana-rpc-app/output.txt', 'r')
content = my_file.read()
my_file.close()
content_list = content.split('[')
data = []
for c in content_list:
s = re.split(',', c)
if len(s) > 1 and '#' in s[1]:
data += [[ re.split('"', s[0])[1], int(re.split('#', re.split('"', s[1])[1])[1]) ]]
df = pd.DataFrame(data, columns=['mint','token_id']).drop_duplicates()
df['collection'] = 'DeGods'
df.to_csv('./data/mint_address_token_id_map.csv', index=False)
def mint_address_token_id_map():
old = pd.read_csv('./data/mint_address_token_id_map.csv')
l0 = len(old)
tokens = pd.read_csv('./data/tokens.csv')[['collection','token_id','mint_address']].rename(columns={'mint_address':'mint'}).dropna()
tokens['uri'] = None
tokens = tokens[-tokens.collection.isin(old.collection.unique())]
old = old.append(tokens)
print('Adding {} rows'.format(len(old) - l0))
old.to_csv('./data/mint_address_token_id_map.csv', index=False)
def add_solana_sales():
print('Adding Solana sales...')
query = '''
WITH mints AS (
SELECT DISTINCT LOWER(mint) AS mint
, token_id
, project_name AS collection
FROM solana.dim_nft_metadata
WHERE mint IS NOT NULL
AND token_id IS NOT NULL
AND project_name IS NOT NULL
AND project_name IN (
'Astrals',
'Aurory',
'Cets on Creck',
'Catalina Whale Mixer',
'DeFi Pirates',
'DeGods',
'Degen Apes',
'Meerkat Millionaires',
'Okay Bears',
'Pesky Penguins',
'SOLGods',
'Solana Monkey Business',
'Stoned Ape Crew',
'Thugbirdz'
)
)
SELECT tx_id
, s.mint
, m.collection
, s.block_timestamp AS sale_date
, m.token_id
, sales_amount AS price
FROM solana.fact_nft_sales s
JOIN mints m ON LOWER(m.mint) = LOWER(s.mint)
WHERE block_timestamp >= CURRENT_DATE - 20
'''
sales = ctx.cursor().execute(query)
sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
sales = clean_colnames(sales)
len(sales)
len(sales.tx_id.unique())
m = sales[[ 'tx_id','collection','token_id','sale_date','price' ]]
m['sale_date'] = m.sale_date.apply(lambda x: str(x)[:19] )
old = pd.read_csv('./data/sales.csv')
go = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_old'})
l0 = len(old)
app = old[old.collection.isin(m.collection.unique())].append(m)
app['tmp'] = app.apply(lambda x: x['collection']+str(int(float(x['token_id'])))+x['sale_date'][:10], 1 )
if len(app[app.tx_id.isnull()]):
app['null_tx'] = app.tx_id.isnull().astype(int)
app = app.sort_values('null_tx', ascending=1)
app = app.drop_duplicates(subset=['tmp'], keep='first')
app['tx_id'] = app.tx_id.fillna(app.tmp)
old = old[-old.collection.isin(m.collection.unique())]
app = app.drop_duplicates(subset=['tx_id'])
old = old.append(app)
old = old[[ 'collection','token_id','sale_date','price','tx_id' ]]
old['token_id'] = old.token_id.astype(str)
# old = old.drop_duplicates(subset=['tx_id'])
# old[old.tx_id.isnull()]
# check changes
l1 = len(old)
gn = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_new'})
g = gn.merge(go, how='outer', on=['collection']).fillna(0)
g['dff'] = g.n_new - g.n_old
g = g[g.dff != 0].sort_values('dff', ascending=0)
print(g)
print('Added {} sales'.format(l1 - l0))
old.to_csv('./data/sales.csv', index=False)
return(old)
def add_eth_sales():
print('Adding ETH sales...')
query = '''
SELECT project_name
, token_id
, block_timestamp AS sale_date
, price
, tx_id
FROM ethereum.nft_events
WHERE project_name IN (
'BoredApeYachtClub'
, 'MutantApeYachtClub'
, 'BoredApeKennelClub'
)
'''
sales = ctx.cursor().execute(query)
sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
sales = clean_colnames(sales)
# print('Queried {} sales'.format(len(sales)))
# sales['chain'] = 'Ethereum'
sorted(sales.project_name.unique())
sales['collection'] = sales.project_name.apply(lambda x: clean_name(x) )
# print(sales.groupby('collection').sale_date.max())
sorted(sales.collection.unique())
# m = sales.merge(id_map, how='left', on=['mint','collection'])
# m = sales.merge(id_map, how='inner', on=['mint','collection'])
# m.sort_values('collection')
m = sales[[ 'collection','token_id','sale_date','price','tx_id' ]]
old = pd.read_csv('./data/sales.csv')
l0 = len(old)
old = old[old.collection != 'Bakc']
old = old[-old.collection.isin(sales.collection.unique())]
old = old.append(m)
# print(old.groupby('collection').token_id.count())
l1 = len(old)
print('Added {} sales'.format(l1 - l0))
old.to_csv('./data/sales.csv', index=False)
pass
def solana_metadata():
metadata = pd.read_csv('./data/metadata.csv')
metadata[metadata.collection == 'Solana Monkey Business'].feature_name.unique()
metadata = metadata[ metadata.collection.isin(['Aurory', 'Degen Apes', 'Galactic Punks', 'Pesky Penguins', 'Solana Monkey Business', 'Thugbirdz']) ]
collection = 'Solana Monkey Business'
for collection in metadata.collection.unique():
cur = metadata[metadata.collection == collection].fillna('None')
cur['token_id'] = cur.token_id.astype(int)
pct = cur[['token_id']].drop_duplicates()
pct['pct'] = 1
num_tokens = len(cur.token_id.unique())
print('Working on {} with {} tokens'.format(collection, num_tokens))
min(cur.token_id)
max(cur.token_id)
ps = pd.DataFrame()
for c in cur.feature_name.unique():
# if c in [ 'Attribute Count' ]:
# continue
g = cur[cur.feature_name == c].groupby('feature_value').token_id.count().reset_index()
g['cur_pct'] = (g.token_id / num_tokens)
g = cur[cur.feature_name == c].merge(g[[ 'feature_value', 'cur_pct' ]] )
ps = ps.append(g[['token_id','cur_pct']])
pct = pct.merge(g[['token_id', 'cur_pct']])
pct['pct'] = pct.pct * pct.cur_pct * pct.cur_pct
del pct['cur_pct']
ps['rk'] = ps.groupby('token_id').cur_pct.rank(ascending=0)
ps[ps.token_id == 1355]
mn = ps.rk.min()
mx = ps.rk.max()
ps['mult'] = ps.apply(lambda x: x['cur_pct'] ** (1 + (x['rk'] / (mx - mn)) ) )
def run_queries():
for c in [ 'Levana Dragon Eggs','Levana Meteors','Levana Dust' ][1:]:
print(c)
with open('./metadata/sql/{}.txt'.format(c)) as f:
query = f.readlines()
metadata = ctx.cursor().execute(' '.join(query))
metadata = pd.DataFrame.from_records(iter(metadata), columns=[x[0] for x in metadata.description])
metadata = clean_colnames(metadata)
metadata['image'] = metadata.image.apply(lambda x: 'https://cloudflare-ipfs.com/ipfs/'+re.split('/', x)[-1] )
metadata['collection'] = c
metadata['chain'] = 'Terra'
list(metadata.image.values[:2]) + list(metadata.image.values[-2:])
metadata.to_csv('./data/metadata/{}.csv'.format(c), index=False)
def add_terra_tokens():
# galactic punks
query = '''
SELECT msg_value:execute_msg:mint_nft:token_id AS token_id
, msg_value:execute_msg:mint_nft:extension:name AS name
, msg_value:execute_msg:mint_nft:extension:image AS image
FROM terra.msgs
WHERE msg_value:contract::string = 'terra16wuzgsx3tz4hkqu73q5s7unxenefkkvefvewsh'
AND tx_status = 'SUCCEEDED'
AND msg_value:execute_msg:mint_nft is not null
'''
tokens = ctx.cursor().execute(query)
tokens = pd.DataFrame.from_records(iter(tokens), columns=[x[0] for x in tokens.description])
tokens = clean_colnames(tokens)
len(tokens)
for c in tokens.columns:
tokens[c] = tokens[c].apply(lambda x: re.sub('"', '', x) )
collection = 'Levana Dragon Eggs'
collection = 'Galactic Angels'
for collection in [ 'Galactic Punks', 'LunaBulls', 'Levana Dragon Eggs' ]:
if collection == 'Galactic Punks':
df = tokens
df['image_url'] = df.image.apply(lambda x: 'https://ipfs.io/ipfs/'+re.split('/', x)[-1] )
else:
df = pd.read_csv('./data/metadata/{}.csv'.format(collection))
df = clean_colnames(df).rename(columns={'tokenid':'token_id'})
df['collection'] = collection
if collection == 'LunaBulls':
df['image_url'] = df.ipfs_image
elif 'image' in df.columns:
df['image_url'] = df.image
df['clean_token_id'] = df.name.apply(lambda x: re.split('#', x)[1] ).astype(int)
df['collection'] = collection
df['chain'] = 'Terra'
old = pd.read_csv('./data/tokens.csv')
old = old[ -(old.collection == collection) ]
old = old.drop_duplicates(subset=['collection','token_id'], keep='first')
df['market_url'] = df.apply(lambda x: '' if x['chain'] == 'Solana' else 'https://randomearth.io/items/{}_{}'.format( d_market[x['collection']], x['token_id'] ), 1)
df = df[list(old.columns)]
old = old.append(df)
print(old.groupby('collection').clean_token_id.count())
old.to_csv('./data/tokens.csv', index=False)
def add_terra_metadata():
query = '''
SELECT CASE
WHEN contract_address = 'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v' THEN 'Levana Dragons'
WHEN contract_address = 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2' THEN 'LunaBulls'
WHEN contract_address = 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k' THEN 'Galactic Punks'
ELSE 'Other'
END AS collection
, token_id
, token_metadata:traits AS traits
FROM terra.nft_metadata
WHERE contract_address in (
'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v'
, 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2'
, 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k'
)
AND token_metadata:traits IS NOT NULL
'''
db_metadata = ctx.cursor().execute(query)
db_metadata = pd.DataFrame.from_records(iter(db_metadata), columns=[x[0] for x in db_metadata.description])
db_metadata = clean_colnames(db_metadata)
collection = 'Levana Dragon Eggs'
collection = 'Galactic Angels'
for collection in [ 'Galactic Punks', 'LunaBulls', 'Levana Dragon Eggs' ]:
if collection == 'Galactic Punks':
cur = db_metadata[ db_metadata.collection == collection ]
data = []
for row in cur.iterrows():
row = row[1]
trait_names = [ re.split('"', re.split(':', x)[0])[1] for x in re.split(',', row['traits'])]
trait_values = [ re.split('"', re.split(':', x)[1])[1] for x in re.split(',', row['traits'])]
d = {'collection':row['collection'], 'token_id':row['token_id']}
for n, v in zip(trait_names, trait_values):
d[n] = v
data += [d]
metadata = pd.DataFrame(data)
else:
metadata = pd.read_csv('./data/metadata/{}.csv'.format(collection))
metadata.columns = [ x.lower() for x in metadata.columns ]
if 'Levana' in collection:
metadata = metadata.rename(columns={'rank':'collection_rank'})
metadata = clean_colnames(metadata).rename(columns={'tokenid':'token_id'})
cols = [ c for c in metadata.columns if not c in [ 'block_timestamp','block_id','tx_id','collection','chain','name','image','token_name' ] ]
metadata = metadata[cols]
metadata['collection'] = collection
none_col = 'None'
metadata = metadata.fillna(none_col)
for c in [ x for x in metadata.columns if type(metadata[x].values[0])==str]:
metadata[c] = metadata[c].apply(lambda x: re.sub('"', '', x) )
if collection == 'Galactic Punks':
glitches = [ 'messy pink','messy blue','ponytail red','messy brown','neat brown','ponytail black','neat red','messy blonde','neat black','neat blonde','ponytail blonde' ]
metadata['glitch_trait'] = metadata.hair.apply(lambda x: 'Yes' if x in glitches else 'No' )
metadata['pct'] = 1
metadata['attribute_count'] = 0
l = len(metadata)
incl_att_count = not collection in [ 'Levana Dragon Eggs' ]
for c in list(metadata.columns) + ['attribute_count']:
if c in ['token_id','collection','pct','levana_rank','meteor_id']:
continue
if c == 'attribute_count' and not incl_att_count:
continue
metadata[c] = metadata[c].apply(lambda x: re.sub('_', ' ', x).title() if x==x and type(x) == str else x )
g = metadata.groupby(c).token_id.count().reset_index()
g['cur_pct'] = g.token_id / l
metadata = metadata.merge(g[[c, 'cur_pct']])
metadata['pct'] = metadata.pct * metadata.cur_pct
if incl_att_count and not c in ['attribute_count','glitch_trait']:
metadata['attribute_count'] = metadata.attribute_count + metadata[c].apply(lambda x: int(x != none_col) )
del metadata['cur_pct']
# cur = metadata[[ 'collection','token_id', c ]].rename(columns={c: 'feature_value'})
# cur['feature_name'] = c
# m = m.append(cur)
if incl_att_count:
metadata.groupby('attribute_count').token_id.count().reset_index()
# metadata.groupby(['rarity','attribute_count']).token_id.count().reset_index()
# metadata.groupby('backgrounds').token_id.count().reset_index().token_id.sum()
# metadata.sort_values('pct_rank')
metadata.sort_values('pct')
metadata['nft_rank'] = metadata.pct.rank()
# metadata['rarity_score'] = metadata.pct.apply(lambda x: 1.0 / (x**0.07) )
# mn = metadata.rarity_score.min()
# mx = metadata.rarity_score.max()
# metadata = metadata.sort_values('token_id')
# metadata['rarity_score'] = metadata.rarity_score.apply(lambda x: ((x - mn) * 99 / (mx - mn)) + 1)
# metadata['rarity_score_rank'] = metadata.rarity_score.rank(ascending=0, method='first').astype(int)
# metadata.sort_values('rarity_score', ascending=0).head(20)[['token_id','collection_rank','rarity_score','rarity_score_rank']]
# metadata.sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score']]
# len(metadata[metadata.rarity_score<=2.4]) / len(metadata)
# metadata[metadata.token_id==6157].sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score','rank']]
# metadata[metadata['rank']>=3000].groupby('weight').token_id.count()
# metadata.rarity_score.max()
# metadata.rarity_score.min()
# metadata.sort_values('rank')[['rank','pct','rarity_score']]
m = pd.DataFrame()
for c in metadata.columns:
if c in [ 'token_id','collection' ]:
continue
cur = metadata[[ 'token_id','collection', c ]].rename(columns={c: 'feature_value'})
cur['feature_name'] = c
m = m.append(cur)
m['chain'] = 'Terra'
m.groupby('feature_name').feature_value.count()
if collection == 'Levana Dragon Eggs':
add = m[m.feature_name=='collection_rank']
add['feature_name'] = 'transformed_collection_rank'
mx = add.feature_value.max()
mn = add.feature_value.min()
add['feature_value'] = add.feature_value.apply(lambda x: 1.42**(1.42**(8*(x-mn)/(mx-mn))) + 0.13)
# add['tmp'] = add.feature_value.rank() * 10 / len(add)
# add['tmp'] = add.tmp.astype(int)
# add.groupby('tmp').feature_value.mean()
m = m.append(add)
add = m[m.feature_name=='collection_rank']
add['feature_name'] = 'collection_rank_group'
add['feature_value'] = add.feature_value.apply(lambda x: int(x/1000))
m = m.append(add)
g = m.groupby('feature_value').feature_name.count().reset_index().sort_values('feature_name').tail(50)
old = pd.read_csv('./data/metadata.csv')
m['feature_name'] = m.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
m['feature_value'] = m.feature_value.apply(lambda x: re.sub('_', ' ', x).title() if type(x) == str else x )
l0 = len(old)
if not 'chain' in old.columns:
old['chain'] = old.collection.apply(lambda x: 'Terra' if x in [ 'Galactic Punks', 'LunaBulls' ] else 'Solana' )
old = old[-old.collection.isin(m.collection.unique())]
old = old.append(m)
old = old.drop_duplicates(subset=['collection','token_id','feature_name'])
old = old[-(old.feature_name.isin(['last_sale']))]
# print(old.groupby(['chain','collection']).token_id.count())
print(old[['chain','collection','token_id']].drop_duplicates().groupby(['chain','collection']).token_id.count())
l1 = len(old)
print('Adding {} rows'.format(l1 - l0))
old.to_csv('./data/metadata.csv', index=False)
def add_terra_sales():
print('Adding Terra sales')
query = '''
WITH
RE_events AS (
SELECT
block_timestamp,
tx_id,
event_attributes
FROM
terra.msg_events
WHERE event_attributes:action = 'execute_orders'
AND event_type = 'from_contract'
AND tx_status = 'SUCCEEDED'
AND block_timestamp >= CURRENT_DATE - 3
),
RE_takers AS (
SELECT DISTINCT
tx_id,
msg_value:sender as taker
FROM
terra.msgs
WHERE
tx_id IN (SELECT DISTINCT tx_id FROM RE_events)
AND block_timestamp >= CURRENT_DATE - 3
),
allSales AS
(
SELECT
block_timestamp,
tx_id,
platform,
nft_from,
nft_to,
nft_address,
CASE nft_address
WHEN 'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2' THEN 'LunaBulls'
WHEN 'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k' THEN 'Galactic Punks'
WHEN 'terra1vhuyuwwr4rkdpez5f5lmuqavut28h5dt29rpn6' THEN 'Levana Dragons'
WHEN 'terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7' THEN 'Levana Dust'
WHEN 'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg' THEN 'Levana Eggs'
WHEN 'terra14gfnxnwl0yz6njzet4n33erq5n70wt79nm24el' THEN 'Levana Loot'
WHEN 'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v' THEN 'Levana Meteors'
WHEN 'terra13nccm82km0ttah37hkygnvz67hnvkdass24yzv' THEN 'Galactic Angels'
ELSE nft_address END
as nft,
amount,
denom,
tokenid
FROM (
SELECT
block_timestamp,
tx_id,
'Random Earth' as platform,
action,
IFF(action = 'SELL', maker, taker) as nft_from,
IFF(action = 'ACCEPT BID', maker, taker) as nft_to,
nft_address,
amount,
denom,
tokenid
FROM (
SELECT
block_timestamp,
e.tx_id,
action,
maker,
taker,
nft_address,
amount,
denom,
tokenid
FROM (
SELECT
block_timestamp,
tx_id,
IFF(event_attributes:order:order:maker_asset:info:nft is not null, 'SELL', 'ACCEPT BID') as action,
LISTAGG(CHR(F.VALUE)) WITHIN GROUP (ORDER BY F.INDEX) as maker,
IFF(event_attributes:order:order:maker_asset:info:nft is not null, event_attributes:order:order:maker_asset:info:nft:contract_addr, event_attributes:order:order:taker_asset:info:nft:contract_addr)::string as nft_address,
IFF(event_attributes:order:order:maker_asset:info:nft is not null, event_attributes:order:order:taker_asset:amount, event_attributes:order:order:maker_asset:amount) / 1e6 as amount,
IFF(event_attributes:order:order:maker_asset:info:nft is not null, event_attributes:order:order:taker_asset:info:native_token:denom, event_attributes:order:order:maker_asset:info:native_token:denom)::string as denom,
IFF(event_attributes:order:order:maker_asset:info:nft is not null, event_attributes:order:order:maker_asset:info:nft:token_id, event_attributes:order:order:taker_asset:info:nft:token_id) as tokenid
FROM
RE_events e,
LATERAL FLATTEN(input => event_attributes:order:order:maker) F
GROUP BY
block_timestamp,
tx_id,
nft_address,
amount,
denom,
tokenid,
action
) e
JOIN RE_takers t
ON e.tx_id = t.tx_id
)
UNION
SELECT
block_timestamp,
tx_id,
'Knowhere' as platform,
MAX(IFF(event_attributes:bid_amount is not null, 'SELL', 'AUCTION')) as action,
MAX(IFF(event_type = 'coin_received', COALESCE(event_attributes:"2_receiver", event_attributes:"1_receiver"), '')) as nft_from,
MAX(IFF(event_attributes:"0_action" = 'settle' AND event_attributes:"1_action" = 'transfer_nft', event_attributes:recipient, '')) as nft_to,
MAX(IFF(event_attributes:"1_action" is not null, event_attributes:"1_contract_address", ''))::string as nft_address,
MAX(IFF(event_type = 'coin_received', COALESCE(NVL(event_attributes:"0_amount"[0]:amount,0) + NVL(event_attributes:"1_amount"[0]:amount,0) + NVL(event_attributes:"2_amount"[0]:amount, 0), event_attributes:amount[0]:amount), 0)) / 1e6 as amount,
MAX(IFF(event_type = 'coin_received', COALESCE(event_attributes:"0_amount"[0]:denom, event_attributes:amount[0]:denom), ''))::string as denom,
MAX(IFF(event_type = 'wasm', event_attributes:token_id, 0)) as tokenid
FROM
terra.msg_events
WHERE
tx_status = 'SUCCEEDED'
AND block_timestamp >= CURRENT_DATE - 3
AND tx_id IN (
SELECT DISTINCT
tx_id
FROM terra.msgs
WHERE
msg_value:execute_msg:settle:auction_id is not null
AND tx_status = 'SUCCEEDED'
AND msg_value:contract = 'terra12v8vrgntasf37xpj282szqpdyad7dgmkgnq60j'
AND block_timestamp >= CURRENT_DATE - 3
)
GROUP BY
block_timestamp,
tx_id
UNION
SELECT
block_timestamp,
tx_id,
'Luart' as platform,
UPPER(event_attributes:order_type) as action,
event_attributes:order_creator as nft_from, -- for sells, no info about other types yet
event_attributes:recipient as nft_to,
event_attributes:nft_contract_address as nft_address,
event_attributes:price / 1e6 as amount,
event_attributes:denom::string as denom,
event_attributes:"0_token_id" as tokenid
FROM terra.msg_events
WHERE
event_type = 'from_contract'
AND event_attributes:action = 'transfer_nft'
AND event_attributes:method = 'execute_order'
AND event_attributes:"0_contract_address" = 'terra1fj44gmt0rtphu623zxge7u3t85qy0jg6p5ucnk'
AND block_timestamp >= CURRENT_DATE - 3
)
WHERE nft_address IN (
'terra13nccm82km0ttah37hkygnvz67hnvkdass24yzv',
'terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2',
'terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k',
'terra1vhuyuwwr4rkdpez5f5lmuqavut28h5dt29rpn6',
'terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7',
'terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg',
'terra14gfnxnwl0yz6njzet4n33erq5n70wt79nm24el',
'terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v'
)
)
select * from allsales
'''
sales = ctx.cursor().execute(query)
sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
sales = clean_colnames(sales)
# tokens = pd.read_csv('./data/tokens.csv')
# tokens['tmp'] = tokens.token_id.apply(lambda x: (str(x)[:5]))
# tokens[tokens.collection == 'Galactic Punks'].to_csv('~/Downloads/tmp.csv', index=False)
sales.tokenid.values[:4]
sales['tokenid'] = sales.tokenid.apply(lambda x: str(int(float(x))) )
# tokens['token_id'] = tokens.token_id.astype(str)
# s = sales[sales.nft == 'Galactic Punks']
# t = tokens[tokens.collection == 'Galactic Punks'].token_id.values
# s[s.tokenid.isin(t)]
sales = sales.rename(columns={
'nft':'collection'
, 'block_timestamp': 'sale_date'
, 'amount': 'price'
, 'tokenid': 'token_id'
})
sales = clean_token_id(sales)
assert(len(sales[sales.price.isnull()]) == 0)
old = pd.read_csv('./data/sales.csv')
go = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_old'})
l0 = len(old)
app = old[ (old.collection.isin(sales.collection.unique())) ].append(sales)
assert(len(app[app.tx_id.isnull()]) == 0)
app = app.drop_duplicates('tx_id')
old = old[ -(old.collection.isin(sales.collection.unique())) ]
old = old.append(app)
old = old[[ 'collection','token_id','sale_date','price','tx_id' ]]
# check changes
l1 = len(old)
gn = old.groupby('collection').token_id.count().reset_index().rename(columns={'token_id':'n_new'})
g = gn.merge(go, how='outer', on=['collection']).fillna(0)
g['dff'] = g.n_new - g.n_old
g = g[g.dff != 0].sort_values('dff', ascending=0)
print(g)
print('Added {} sales'.format(l1 - l0))
old.to_csv('./data/sales.csv', index=False)
return(old)
def rarity_tools(browser):
data = []
collection = 'boredapeyachtclub'
collection = 'mutant-ape-yacht-club'
collection = 'bored-ape-kennel-club'
url = 'https://rarity.tools/{}'.format(collection)
browser.get(url)
for i in range(201):
print(i, len(data))
sleep(0.1)
soup = BeautifulSoup(browser.page_source)
for div in soup.find_all('div', class_='bgCard'):
rk = div.find_all('div', class_='font-extrabold')
img = div.find_all('img')
if len(rk) and len(img):
# try:
rk = int(just_float(rk[0].text))
img_url = re.split('\?', img[0].attrs['src'])[0]
token_id = int(re.split('/|\.', img_url)[6])
data += [[ collection, token_id, img_url, rk ]]
# except:
# pass
# bs = browser.find_elements_by_class_name('smallBtn')
browser.find_elements_by_class_name('smallBtn')[4 + int(i > 0)].click()
sleep(0.1)
# for i in range(len(bs)):
# print(i, browser.find_elements_by_class_name('smallBtn')[i].text)
df = pd.DataFrame(data, columns=['collection','token_id','image_url','nft_rank']).drop_duplicates()
len(df)
df['chain'] = 'Ethereum'
df['clean_token_id'] = df.token_id
df['collection'] = df.collection.apply(lambda x: clean_name(x) )
len(df)
old = pd.read_csv('./data/tokens.csv')
l0 = len(old)
old = old[-old.collection.isin(df.collection.unique())]
old = old.append(df)
l1 = len(old)
print('Added {} rows'.format(format_num(l1 - l0)))
old.tail()
old[old.chain == 'Ethereum'].collection.unique()
old.to_csv('./data/tokens.csv', index=False)
def eth_metadata_api():
old = pd.read_csv('./data/metadata.csv')
collection = 'BAYC'
collection = 'MAYC'
seen = []
seen = sorted(old[old.collection == collection].token_id.unique())
a_data = []
t_data = []
errs = []
# for i in range(10000):
it = 0
for i in ids[21:]:
sleep(.1)
it += 1
if it % 1 == 0:
print(i, len(t_data), len(a_data), len(errs))
if i in seen:
continue
# try:
url = 'https://boredapeyachtclub.com/api/mutants/{}'.format(i)
try:
j = requests.get(url).json()
t_data += [[ i, j['image'] ]]
for a in j['attributes']:
a_data += [[ i, a['trait_type'], a['value'] ]]
except:
print('Re-trying once...')
sleep(30)
try:
j = requests.get(url).json()
t_data += [[ i, j['image'] ]]
for a in j['attributes']:
a_data += [[ i, a['trait_type'], a['value'] ]]
except:
print('Re-trying twice...')
sleep(30)
j = requests.get(url).json()
t_data += [[ i, j['image'] ]]
for a in j['attributes']:
a_data += [[ i, a['trait_type'], a['value'] ]]
# errs.append(i)
new_mdf = pd.DataFrame(a_data, columns=['token_id','feature_name','feature_value'])
new_mdf['collection'] = 'MAYC'
new_mdf['chain'] = 'Ethereum'
old = old.append(new_mdf)
old.to_csv('./data/metadata.csv', index=False)
new_tdf = pd.DataFrame(t_data, columns=['token_id','image_url'])
new_tdf['collection'] = 'MAYC'
m = pd.read_csv('./data/metadata.csv')
old = pd.read_csv('./data/tokens.csv')
l0 = len(old)
old = old.merge(new_tdf, on=['collection', 'token_id'], how='left')
old[old.image_url_y.notnull()]
old[old.image_url_y.notnull()][['image_url_x','image_url_y']]
old['image_url'] = old.image_url_y.fillna(old.image_url_x)
del old['image_url_x']
del old['image_url_y']
l1 = len(old)
print('Adding {} rows'.format(l1 - l0))
old.to_csv('./data/tokens.csv', index=False)
tmp = old[old.collection == 'MAYC']
tmp['tmp'] = tmp.image_url.apply(lambda x: int('nft-media' in x) )
tmp[tmp.tmp == 1].merge(m[['token_id']].drop_duplicates())[['token_id']].drop_duplicates()
ids = tmp[tmp.tmp == 1].merge(m[['token_id']].drop_duplicates()).token_id.unique()
a = old[old.collection == 'MAYC'].token_id.unique()
b = new_tdf.token_id.unique()
[x for x in b if not x in a]
new_mdf['collection'] = 'MAYC'
new_mdf['chain'] = 'Ethereum'
old = old.append(new_mdf)
old.to_csv('./data/metadata.csv', index=False)
collection = 'BAYC'
data = []
for i in range(0, 1000):
if i % 100 == 1:
print(i, len(data))
url = 'https://ipfs.io/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/{}'.format(i)
# try:
j = requests.get(url, verify=False, timeout=1).json()
data += [[ collection, i, j['image'] ]]
# except:
# print(i)
def eth_metadata():
query = '''
SELECT contract_name
, token_id
, token_metadata:Background AS background
, token_metadata:Clothes AS clother
, token_metadata:Earring AS earring
, token_metadata:Eyes AS eyes
, token_metadata:Fur AS fur
, token_metadata:Hat AS hat
, token_metadata:Mouth AS mouth
, image_url
FROM ethereum.nft_metadata
WHERE contract_name IN ('MutantApeYachtClub','bayc')
'''
metadata = ctx.cursor().execute(query)
metadata = pd.DataFrame.from_records(iter(metadata), columns=[x[0] for x in metadata.description])
metadata = clean_colnames(metadata)
metadata['collection'] = metadata.contract_name.apply(lambda x: x[0].upper()+'AYC' )
metadata['image_url'] = metadata.image_url.apply(lambda x: 'https://ipfs.io/ipfs/{}'.format(re.split('/', x)[-1]) if 'ipfs' in x else x )
# metadata['image_url'] = metadata.tmp
old = pd.read_csv('./data/tokens.csv')
old = old.merge( metadata[[ 'collection','token_id','image_url' ]], how='left', on=['collection','token_id'] )
old[old.image_url_y.notnull()]
old['image_url'] = old.image_url_y.fillna(old.image_url_x)
del old['image_url_x']
del old['image_url_y']
del metadata['image_url']
old.to_csv('./data/tokens.csv', index=False)
ndf = pd.DataFrame()
e = [ 'contract_name', 'token_id', 'collection' ]
for c in [ c for c in metadata.columns if not c in e ]:
cur = metadata[['collection','token_id',c]]
cur.columns = [ 'collection','token_id','feature_value' ]
cur['feature_name'] = c.title()
cur.feature_value.unique()
cur['feature_value'] = cur.feature_value.apply(lambda x: x[1:-1] if x else 'None' )
ndf = ndf.append(cur)
ndf = ndf.drop_duplicates()
ndf['chain'] = 'Ethereum'
g = ndf.groupby(['collection', 'feature_name', 'feature_value']).token_id.count().reset_index()
old = pd.read_csv('./data/metadata.csv')
old.head()
l0 = len(old)
old = old.append(ndf)
l1 = len(old)
print('Adding {} rows'.format(l1 - l0))
old.to_csv('./data/metadata.csv', index=False)
t_data = []
a_data = []
for i in range(10000):
if i % 100 == 1:
print(i, len(t_data), len(a_data))
token_id = i + 1
url = 'https://us-central1-bayc-metadata.cloudfunctions.net/api/tokens/{}'.format(i)
j = requests.get(url).json()
t_data += [[ token_id, j['image'] ]]
for a in j['attributes']:
a_data += [[ token_id, a['trait_type'], a['value'] ]]
df = pd.DataFrame(t_data, columns=['token_id',''])
######################################
# Grab Data From OpenSea API #
######################################
def load_api_data():
headers = {
'Content-Type': 'application/json'
, 'X-API-KEY': '2b7cbb0ebecb468bba431aefb8dbbebe'
}
data = []
traits_data = []
contract_address = '0x23581767a106ae21c074b2276d25e5c3e136a68b'
# url = 'https://api.opensea.io/api/v1/assets?asset_contract_address=0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d&limit=20&token_ids=8179'
# for o in [ 'asc', 'desc' ]:
for o in [ 'asc' ]:
l = 1
it = 0
offset = 0
while l and offset <= 10000:
if offset % 1000 == 0:
print("#{}/{}".format(offset, 20000))
r = requests.get('https://api.opensea.io/api/v1/assets?asset_contract_address={}&order_by=pk&order_direction={}&offset={}&limit=50'.format(contract_address, o, offset), headers = headers)
# r = requests.get(url)
assets = r.json()['assets']
l = len(assets)
for a in assets:
token_id = a['token_id']
for t in a['traits']:
traits_data += [[ contract_address, token_id, t['trait_type'], t['value'] ]]
data += [[ contract_address, token_id, a['image_url'] ]]
offset += 50
opensea_data = pd.DataFrame(data, columns=['contract_address','token_id','image_url']).drop_duplicates()
len(opensea_data.token_id.unique())
traits = pd.DataFrame(traits_data, columns=['contract_address','token_id','trait_type','trait_value']).drop_duplicates()
# a = set(range(opensea_data.token_id.min(), opensea_data.token_id.max()))
# b = set(opensea_data.token_id.unique())
# a.difference(b)
# len(opensea_data)
# sorted(traits.trait_type.unique())
traits = traits[(traits.trait_type != 'Token ID')]
traits['token_id'] = traits.token_id.astype(int)
traits.to_csv('./data/moonbird_traits.csv', index=False)
opensea_data.to_csv('./data/moonbird_data.csv', index=False)
traits = pd.read_csv('./data/mayc_traits.csv')
opensea_data = pd.read_csv('./data/mayc_data.csv')
len(traits.token_id.unique())
opensea_data['token_id'] = opensea_data.token_id.astype(int)
opensea_data.token_id.max()
len(opensea_data)
it = 0
max_it = 9458
for row in opensea_data.iterrows():
it += 1
if it % 100 == 0:
print('#{}/{}'.format(it, len(opensea_data)))
if it < max_it:
continue
row = row[1]
urllib.request.urlretrieve(row['image_url'], './viz/www/img/{}.png'.format(row['token_id']))
def load_api_data():
results = []
contract_address = '0x60e4d786628fea6478f785a6d7e704777c86a7c6'
for o in [ 'asc', 'desc' ]:
l = 1
it = 0
offset = 0
while l and offset <= 10000:
if offset % 1000 == 0:
print("#{}/{}".format(offset, 20000))
r = requests.get('https://api.opensea.io/api/v1/assets?asset_contract_address={}&order_by=pk&order_direction={}&offset={}&limit=50'.format(contract_address, o, offset))
assets = r.json()['assets']
for a in assets:
token_metadata = {}
for t in a['traits']:
token_metadata[t['trait_type']] = t['value']
token_id = a['token_id']
d = {
'commission_rate': None
, 'contract_address': a['asset_contract']['address']
, 'contract_name': a['asset_contract']['name']
, 'created_at_block_id': 0
, 'created_at_timestamp': re.sub('T', ' ', str(a['asset_contract']['created_date']))
, 'created_at_tx_id': ''
, 'creator_address': a['creator']['address'] if a['creator'] else a['asset_contract']['address']
, 'creator_name': a['creator']['address'] if a['creator'] else a['asset_contract']['name']
, 'image_url': a['image_url']
, 'project_name': a['asset_contract']['name']
, 'token_id': token_id
, 'token_metadata': token_metadata
, 'token_metadata_uri': a['image_original_url']
, 'token_name': '{} #{}'.format(a['asset_contract']['symbol'], token_id)
}
results.append(d)
offset += 50
n = 50
r = math.ceil(len(results) / n)
blockchain = 'ethereum'
directory = 'mayc'
for i in range(r):
newd = {
"model": {
"blockchain": blockchain,
"sinks": [
{
"destination": "{database_name}.silver.nft_metadata",
"type": "snowflake",
"unique_key": "blockchain || contract_address || token_id"
}
],
},
"results": results[(i * n):((i * n)+r)]
}
with open('./data/metadata/{}/{}.txt'.format(directory, i), 'w') as outfile:
outfile.write(json.dumps(newd))