2021-12-17 16:26:55 +00:00
import re
2021-10-27 18:05:43 +00:00
import os
2021-12-17 16:26:55 +00:00
import json
2022-07-14 22:10:02 +00:00
import time
2021-12-17 16:26:55 +00:00
import math
2021-10-27 18:05:43 +00:00
import requests
import pandas as pd
import urllib . request
2021-12-17 16:26:55 +00:00
import snowflake . connector
2022-03-21 16:34:42 +00:00
from bs4 import BeautifulSoup
from time import sleep
2022-07-14 22:10:02 +00:00
import cloudscraper
from theblockchainapi import SolanaAPIResource , SolanaNetwork , SearchMethod
# Get an API key pair for free here: https://dashboard.blockchainapi.com/api-keys
MY_API_KEY_ID = ' sLbjx8YFYdTtUuH '
MY_API_SECRET_KEY = ' p24pFaM9lLbWscN '
BLOCKCHAIN_API_RESOURCE = SolanaAPIResource (
api_key_id = MY_API_KEY_ID ,
api_secret_key = MY_API_SECRET_KEY
)
2021-10-27 18:05:43 +00:00
os . chdir ( ' /Users/kellenblumberg/git/nft-deal-score ' )
2022-03-21 16:34:42 +00:00
from solana_model import just_float
2022-07-14 22:10:02 +00:00
from utils import clean_name , clean_token_id , format_num , merge
2022-02-24 20:49:51 +00:00
2021-12-17 16:26:55 +00:00
#########################
# Connect to DB #
#########################
with open ( ' snowflake.pwd ' , ' r ' ) as f :
pwd = f . readlines ( ) [ 0 ] . strip ( )
with open ( ' snowflake.usr ' , ' r ' ) as f :
usr = f . readlines ( ) [ 0 ] . strip ( )
ctx = snowflake . connector . connect (
user = usr ,
password = pwd ,
account = ' vna27887.us-east-1 '
)
2022-04-05 05:25:23 +00:00
# query = '''
# SHOW TABLES
# '''
# sales = ctx.cursor().execute(query)
# sales = pd.DataFrame.from_records(iter(sales), columns=[x[0] for x in sales.description])
# sales = clean_colnames(sales)
# sorted(sales.name.unique())
# sorted(sales.schema_name.unique())
# sorted(sales.database_name.unique())
# sales[sales.name == 'ACTIVE_VAULT_EVENTS'][['name','schema_name']]
# tables = pd.DataFrame()
# df = sales[sales.schema_name.isin(['BRONZE_MIDGARD_2_6_9','BRONZE_MIDGARD_20211108_MIDGARD']) ]
# for row in df.iterrows():
# row = row[1]
# query = 'DESCRIBE TABLE {}.{}'.format(row['schema_name'], row['name'])
# table = ctx.cursor().execute(query)
# table = pd.DataFrame.from_records(iter(table), columns=[x[0] for x in table.description])
# table = clean_colnames(table)
# table['schema_name'] = row['schema_name']
# table['table_name'] = row['name']
# table.head()
# tables = tables.append(table)
# tables['clean_table_name'] = tables.table_name.apply(lambda x: re.sub('MIDGARD_', '', x) )
# a = tables[tables.schema_name == 'BRONZE_MIDGARD_20211108_MIDGARD'][['name','clean_table_name','type']]
# b = tables[tables.schema_name == 'BRONZE_MIDGARD_2_6_9'][['name','clean_table_name','type']]
# c = a.merge(b, on=['clean_table_name','name'], how='outer')
# c['is_hevo_fivetran'] = c.name.apply(lambda x: int(x[:7] == '__HEVO_' or x[:10] == '_FIVETRAN_') )
# c['in_old'] = (c.type_x.notnull()).astype(int)
# c['in_new'] = (c.type_y.notnull()).astype(int)
# c['in_both'] = ((c.in_old + c.in_new) == 2).astype(int)
# c.to_csv('~/Downloads/tmp.csv', index=False)
2021-12-21 05:14:11 +00:00
d_market = {
' Galactic Punks ' : ' terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k ' ,
' LunaBulls ' : ' terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2 ' ,
' Levana Dragon Eggs ' : ' terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg ' ,
' Levana Dust ' : ' terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7 ' ,
' Levana Meteors ' : ' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v ' ,
2022-02-24 20:49:51 +00:00
' Galactic Angels ' : ' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v ' ,
2021-12-21 05:14:11 +00:00
}
2021-12-17 16:26:55 +00:00
###################################
# Define Helper Functions #
###################################
def clean_colnames ( df ) :
names = [ x . lower ( ) for x in df . columns ]
df . columns = names
return ( df )
2022-02-21 16:10:04 +00:00
def add_collection_steps ( ) :
# 1. mint_address_token_id_map
# 2. scrape metadata
metadata = pd . read_csv ( ' ./data/metadata.csv ' )
metadata [ ' collection ' ] = metadata . collection . apply ( lambda x : clean_name ( x ) )
sorted ( metadata . collection . unique ( ) )
metadata . to_csv ( ' ./data/metadata.csv ' , index = False )
metadata [ metadata . collection == ' Stoned Ape Crew ' ]
metadata [ metadata . collection == ' Stoned Ape Crew ' ] . feature_name . unique ( )
# 3. scrape howrareis
# 4. add sales
# 5. run model
pass
2022-07-14 22:10:02 +00:00
def create_upload_file ( ) :
cols = [ ' collection ' , ' mint_address ' ]
a = pd . read_csv ( ' ./data/mints-2022-06-13-2pm.csv ' ) [ cols ]
b = pd . read_csv ( ' ~/Downloads/manual_labels.csv ' )
b . columns = cols
c = pd . read_csv ( ' ~/Downloads/solscan_collections.csv ' ) [ cols ]
d = pd . read_csv ( ' ./data/tokens.csv ' ) [ cols ]
df = pd . concat ( [ a , b , c , d ] ) . drop_duplicates ( subset = [ ' mint_address ' ] , keep = ' last ' )
df . to_csv ( ' ~/Downloads/mints-2022-06-13-5pm.csv ' , index = False )
tmp = pd . read_csv ( ' ~/Downloads/mints-2022-06-13-5pm.csv ' )
tmp [ tmp . mint_address == ' EhuVN896QVypRreAt6mcJr6eKkKunVzsgSRz7qt4oeBr ' ]
2021-12-17 16:26:55 +00:00
def manual_clean ( ) :
for c in [ ' pred_price ' , ' attributes ' , ' feature_values ' , ' model_sales ' , ' listings ' , ' coefsdf ' , ' tokens ' ] :
df = pd . read_csv ( ' ./data/ {} .csv ' . format ( c ) )
df [ ' chain ' ] = ' Solana '
if c == ' tokens ' :
df [ ' clean_token_id ' ] = df . token_id
df . to_csv ( ' ./data/ {} .csv ' . format ( c ) , index = False )
2022-07-14 22:10:02 +00:00
def pull_from_solscan ( ) :
todo = [
[ ' 50a75e6d3d0b6d4a72b2f745fdba4b1c28bc774ca9629fe8e36053ae2fb396f8 ' , ' Degen Egg ' ]
, [ ' 45e3f45d695e9e8775eed480cb0f5a6a957d47dcb3ed3800e454846dca9ab7fc ' , ' Genopets ' ]
, [ ' a437071c6f9679e8431a072ae39421262bf289cc6ead21e38190d5b7b409e7f7 ' , ' Shin Sengoku ' ]
, [ ' d38349f2704e8cd1c538cc48fbea4b3e2596ac8da14b62c0eb3c07aeda7ae75e ' , ' SolStein ' ]
, [ ' 9e0593a4842ceb9ccdc510e6ffdf0d84f736bff2b58d5803c5002ace17df9fe0 ' , ' Zillaz NFT ' ]
, [ ' 895d8f01108fbb6b28c5e32027c9c98e3054241927c8e59c304fa4763c5c88ea ' , ' enviroPass Tier 02 ' ]
, [ ' 59c2a35d902f85feec4c774df503a0df2be263f763dcbcb73bce50c999fc2c78 ' , ' The Fracture ' ]
, [ ' e8dfb059b1dfc71cf97342a1c46793bc5e154909416a93a155929da5bba44a57 ' , ' Suteki ' ]
, [ ' 271e0d68d069d80afbcb916e877831b060933b97e7b02e1cfb77e74b228b4745 ' , ' Chillchat ' ]
]
start = time . time ( )
data = [ ]
meta = [ ]
it = 0
tot = len ( todo )
for collectionId , collection in todo :
it + = 1
print ( ' # {} / {} ' . format ( it , tot ) )
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
offset = 0
limit = 500
while True :
print ( offset )
url = ' https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId= {} &offset= {} &limit= {} ' . format ( collectionId , offset , limit )
r = requests . get ( url )
js = r . json ( ) [ ' data ' ]
offset + = limit
if len ( js ) == 0 :
break
for j in js :
data + = [ [ collectionId , collection , j [ ' info ' ] [ ' mint ' ] ] ]
m = j [ ' info ' ] [ ' meta ' ]
m [ ' mint_address ' ] = j [ ' info ' ] [ ' mint ' ]
# m['name'] = row['name']
# m['update_authority'] = update_authority
meta + = [ m ]
it + = 1
end = time . time ( )
print ( ' Finished {} / {} in {} minutes ' . format ( it , tot , round ( ( end - start ) / 60.0 , 1 ) ) )
df = pd . DataFrame ( data , columns = [ ' collection_id ' , ' collection ' , ' mint_address ' ] )
df . to_csv ( ' ~/Downloads/solscan_collections.csv ' , index = False )
df [ [ ' collection ' , ' mint_address ' ] ] . to_csv ( ' ~/Downloads/mints-2022-06-14-8am.csv ' , index = False )
df . groupby ( ' collection ' ) . mint_address . count ( )
def collecitons_from_missing_tokens ( ) :
query = '''
WITH base AS (
SELECT block_timestamp : : date AS date
, s . *
, ROW_NUMBER ( ) OVER ( ORDER BY sales_amount DESC ) AS rn
FROM solana . fact_nft_sales s
LEFT JOIN solana . dim_labels l on s . mint = l . address
WHERE marketplace in ( ' magic eden v1 ' , ' magic eden v2 ' )
AND block_timestamp > = ' 2022-01-01 '
AND l . address IS NULL
AND sales_amount > = 10
)
SELECT *
FROM base
WHERE rn % 20 = 0
ORDER BY sales_amount DESC
LIMIT 500
'''
missing = ctx . cursor ( ) . execute ( query )
missing = pd . DataFrame . from_records ( iter ( missing ) , columns = [ x [ 0 ] for x in missing . description ] )
missing = clean_colnames ( missing )
missing . head ( )
headers = {
' Authorization ' : ' Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813 '
}
it = 0
tot = len ( missing )
data = [ ]
for m in missing . mint . unique ( ) :
it + = 1
if it % 10 == 0 :
print ( ' # {} / {} ( {} ) ' . format ( it , tot , len ( data ) ) )
url = ' https://api-mainnet.magiceden.dev/v2/tokens/ {} ' . format ( m )
r = requests . get ( url , headers = headers )
j = r . json ( )
data . append ( j )
pass
df = pd . DataFrame ( data )
df . head ( ) [ [ ' collection ' , ' mintAddress ' ] ]
df . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
need = df . groupby ( [ ' collection ' , ' updateAuthority ' ] ) . mintAddress . count ( ) . reset_index ( ) . sort_values ( ' mintAddress ' , ascending = 0 )
need = need [ need . mintAddress > 1 ] . rename ( columns = { ' updateAuthority ' : ' update_authority ' } )
need . to_csv ( ' ~/Downloads/missing.csv ' , index = False )
need . head ( )
sorted ( need . collection . unique ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' _ ' , ' ' , x . title ( ) ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ | ' , ' - ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ) ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ( ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \' ' , ' ' , x ) . strip ( ) )
us = sorted ( g [ g . mintAddress > 1 ] . updateAuthority . unique ( ) )
tot = len ( us )
it = 0
for u in us :
it + = 1
print ( ' # {} / {} ( {} ) ' . format ( it , tot , len ( data ) ) )
nfts = BLOCKCHAIN_API_RESOURCE . search_nfts (
update_authority = u
, update_authority_search_method = SearchMethod . EXACT_MATCH
)
print ( u , len ( nfts ) )
for n in nfts :
m = n [ ' nft_metadata ' ]
data + = [ [ m [ ' update_authority ' ] , m [ ' mint ' ] , m [ ' data ' ] [ ' symbol ' ] , m [ ' data ' ] [ ' name ' ] ] ]
def manual_tags ( ) :
d = {
' daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g ' : ' Degenerate Ape Kindergarten '
, ' FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF ' : ' Botheads '
}
a = ' FbfGrZ3LKuGSsayK57DetzzyN7qKeNnDuLMu5bBSocwF '
c = ' Botheads '
labels = pd . DataFrame ( )
for a , c in d . items ( ) :
query = '''
SELECT DISTINCT instructions [ 1 ] : parsed : info : mint : : string AS mint_address
FROM solana . fact_transactions
WHERE instructions [ 1 ] : parsed : info : mintAuthority = ' {} '
''' .format(a)
df = ctx . cursor ( ) . execute ( query )
df = pd . DataFrame . from_records ( iter ( df ) , columns = [ x [ 0 ] for x in df . description ] )
df = clean_colnames ( df )
df [ ' collection ' ] = c
labels = labels . append ( df )
labels . to_csv ( ' ~/Downloads/manual_labels.csv ' , index = False )
2022-04-15 02:39:10 +00:00
def mints_from_me ( ) :
2022-07-14 22:10:02 +00:00
##################################
# Get All ME Collections #
##################################
2022-06-06 19:15:10 +00:00
headers = {
' Authorization ' : ' Bearer 9c39e05c-db3c-4f3f-ac48-84099111b813 '
}
2022-04-15 02:39:10 +00:00
data = [ ]
has_more = 1
offset = 0
while has_more :
sleep ( 1 )
print ( offset )
url = ' https://api-mainnet.magiceden.dev/v2/collections?offset= {} &limit=500 ' . format ( offset )
r = requests . get ( url )
j = r . json ( )
data = data + j
has_more = len ( j )
offset + = 500
df = pd . DataFrame ( data )
df . to_csv ( ' ./data/me_collections.csv ' , index = False )
2022-06-06 19:15:10 +00:00
df = pd . read_csv ( ' ./data/me_collections.csv ' )
# lp_data = []
# has_more = 1
# offset = 0
# while has_more:
# sleep(1)
# print(offset)
# url = 'https://api-mainnet.magiceden.dev/v2/launchpad/collections?offset={}&limit=500'.format(offset)
# r = requests.get(url)
# j = r.json()
# lp_data = lp_data + j
# has_more = len(j)
# offset += 500
# lp_df = pd.DataFrame(lp_data)
# lp_df.to_csv('./data/me_lp_collections.csv', index=False)
# lp_df = pd.read_csv('./data/me_lp_collections.csv')
2022-04-15 02:39:10 +00:00
2022-07-14 22:10:02 +00:00
###########################################
# Get 1 Mint From Each Collection #
###########################################
2022-04-15 02:39:10 +00:00
it = 0
l_data = [ ]
old_l_df = pd . read_csv ( ' ./data/me_mints.csv ' )
seen = list ( old_l_df . symbol . unique ( ) )
2022-06-06 19:15:10 +00:00
df = df [ - df . symbol . isin ( seen ) ]
df = df . sort_values ( ' symbol ' )
2022-04-15 02:39:10 +00:00
for row in df . iterrows ( ) :
it + = 1
row = row [ 1 ]
print ( ' Listings on {} ... ' . format ( row [ ' symbol ' ] ) )
2022-07-14 22:10:02 +00:00
url = ' https://api-mainnet.magiceden.dev/v2/collections/ {} /activities?offset=0&limit=1 ' . format ( row [ ' symbol ' ] )
2022-04-15 02:39:10 +00:00
if row [ ' symbol ' ] in seen :
print ( ' Seen ' )
continue
try :
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
except :
print ( ' Re-trying in 10s ' )
sleep ( 10 )
try :
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
except :
print ( ' Re-trying in 60s ' )
sleep ( 60 )
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
if len ( j ) :
l_data + = [ [ row [ ' symbol ' ] , row [ ' name ' ] , j [ 0 ] [ ' tokenMint ' ] ] ]
if it % 10 == 0 :
print ( ' it# {} : {} ' . format ( it , len ( l_data ) ) )
l_df = pd . DataFrame ( l_data , columns = [ ' symbol ' , ' name ' , ' mint_address ' ] )
l_df . to_csv ( ' ./data/me_mints.csv ' , index = False )
l_df = pd . DataFrame ( l_data , columns = [ ' symbol ' , ' name ' , ' mint_address ' ] )
2022-06-06 19:15:10 +00:00
l_df = l_df . append ( old_l_df ) . drop_duplicates ( subset = [ ' symbol ' ] )
print ( ' Adding {} rows to me_mints ' . format ( len ( l_df ) - len ( old_l_df ) ) )
2022-04-15 02:39:10 +00:00
l_df . to_csv ( ' ./data/me_mints.csv ' , index = False )
2022-06-06 19:15:10 +00:00
# it = 0
# l_data = []
# seen = [ x[0] for x in l_data ]
# print(len(seen))
# for row in df.iterrows():
# it += 1
# row = row[1]
# print('Listings on {}...'.format(row['symbol']))
# url = 'https://api-mainnet.magiceden.dev/v2/collections/{}/listings?offset=0&limit=1'.format(row['symbol'])
# if row['symbol'] in seen:
# print('Seen')
# continue
# try:
# r = requests.get(url)
# j = r.json()
# except:
# print('Re-trying in 10s')
# sleep(10)
# try:
# r = requests.get(url)
# j = r.json()
# except:
# print('Re-trying in 60s')
# sleep(60)
# r = requests.get(url)
# j = r.json()
# if len(j):
# l_data += [[ row['symbol'], row['name'], j[0]['tokenMint'] ]]
# if it % 10 == 0:
# print('it#{}: {}'.format(it, len(l_data)))
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
# l_df = pd.DataFrame(l_data, columns=['symbol','name','mint_address'])
# l_df.to_csv('./data/me_mints.csv', index=False)
2022-07-14 22:10:02 +00:00
# get missing collections
query = '''
WITH base AS (
SELECT block_timestamp : : date AS date
, s . *
, ROW_NUMBER ( ) OVER ( ORDER BY sales_amount DESC ) AS rn
FROM solana . fact_nft_sales s
LEFT JOIN solana . dim_labels l on s . mint = l . address
WHERE marketplace in ( ' magic eden v1 ' , ' magic eden v2 ' )
AND block_timestamp > = ' 2022-01-01 '
AND block_timestamp < = ' 2022-05-20 '
AND l . address IS NULL
AND sales_amount > 20
)
SELECT *
FROM base
WHERE rn % 50 = 1
LIMIT 100
'''
missing = ctx . cursor ( ) . execute ( query )
missing = pd . DataFrame . from_records ( iter ( missing ) , columns = [ x [ 0 ] for x in missing . description ] )
missing = clean_colnames ( missing )
######################################################
# Get Update Authorities For All Collections #
######################################################
2022-04-15 02:39:10 +00:00
l_df = pd . read_csv ( ' ./data/me_mints.csv ' )
2022-07-14 22:10:02 +00:00
len ( l_df )
l_df . head ( )
2022-04-15 02:39:10 +00:00
m_old = pd . read_csv ( ' ./data/me_update_authorities.csv ' )
2022-07-14 22:10:02 +00:00
m_old [ ' seen ' ] = 1
m_data = list ( m_old [ [ ' symbol ' , ' name ' , ' update_authority ' ] ] . values )
2022-04-15 02:39:10 +00:00
seen = [ x [ 0 ] for x in m_data ]
print ( ' Seen {} m_data ' . format ( len ( seen ) ) )
2022-06-06 19:15:10 +00:00
l_df = l_df [ - l_df . symbol . isin ( seen ) ]
l_df = l_df . sort_values ( ' symbol ' )
2022-04-15 02:39:10 +00:00
it = 0
for row in l_df . iterrows ( ) :
2022-06-06 19:15:10 +00:00
sleep ( .5 )
2022-04-15 02:39:10 +00:00
it + = 1
row = row [ 1 ]
symbol = row [ ' symbol ' ]
print ( ' Working on {} ... ' . format ( symbol ) )
if symbol in seen :
print ( ' Seen ' )
continue
url = ' https://api-mainnet.magiceden.dev/v2/tokens/ {} ' . format ( row [ ' mint_address ' ] )
try :
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
except :
print ( ' Re-trying in 10s ' )
sleep ( 10 )
try :
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
except :
print ( ' Re-trying in 60s ' )
sleep ( 60 )
2022-06-06 19:15:10 +00:00
r = requests . get ( url , headers = headers )
2022-04-15 02:39:10 +00:00
j = r . json ( )
if ' updateAuthority ' in j . keys ( ) :
m_data + = [ [ row [ ' symbol ' ] , row [ ' name ' ] , j [ ' updateAuthority ' ] ] ]
if it % 10 == 0 :
print ( ' it# {} : {} ' . format ( it , len ( m_data ) ) )
m_df = pd . DataFrame ( m_data , columns = [ ' symbol ' , ' name ' , ' update_authority ' ] )
m_df . to_csv ( ' ./data/me_update_authorities.csv ' , index = False )
m_df = pd . DataFrame ( m_data , columns = [ ' symbol ' , ' name ' , ' update_authority ' ] )
2022-06-06 19:15:10 +00:00
m_df = m_df . drop_duplicates ( )
2022-07-14 22:10:02 +00:00
print ( ' Adding {} rows to me_update_authorities ' . format ( len ( m_df ) - len ( m_old ) ) )
2022-04-15 02:39:10 +00:00
m_df . to_csv ( ' ./data/me_update_authorities.csv ' , index = False )
2022-07-14 22:10:02 +00:00
m_df . tail ( 134 ) . head ( 20 )
m_df = m_df . tail ( 134 )
query = '''
SELECT DISTINCT project_name , LOWER ( project_name ) AS lower_name
FROM crosschain . address_labels
WHERE blockchain = ' solana '
AND label_subtype = ' nf_token_contract '
AND project_name IS NOT NULL
'''
labels = ctx . cursor ( ) . execute ( query )
labels = pd . DataFrame . from_records ( iter ( labels ) , columns = [ x [ 0 ] for x in labels . description ] )
labels = clean_colnames ( labels )
labels . to_csv ( ' ~/Downloads/tmp-la.csv ' , index = False )
2022-04-15 02:39:10 +00:00
2022-07-14 22:10:02 +00:00
######################################################
# Get Update Authorities For All Collections #
######################################################
2022-04-15 02:39:10 +00:00
m_df = pd . read_csv ( ' ./data/me_update_authorities.csv ' )
2022-07-14 22:10:02 +00:00
m_df [ ' seen ' ] = ( - m_df . name . isin ( m_df . name . tail ( 134 ) . values ) ) . astype ( int )
m_df [ ' lower_name ' ] = m_df . name . apply ( lambda x : x . lower ( ) )
seen = list ( labels . lower_name . unique ( ) )
m_df [ ' seen ' ] = m_df . lower_name . isin ( seen ) . astype ( int )
n_auth = m_df . groupby ( ' update_authority ' ) . name . count ( ) . reset_index ( ) . rename ( columns = { ' name ' : ' n_auth ' } )
m_df = m_df . merge ( n_auth )
len ( m_df [ m_df . seen == 0 ] )
len ( m_df [ ( m_df . seen == 0 ) & ( m_df . n_auth == 1 ) ] )
len ( m_df [ ( m_df . seen == 0 ) & ( m_df . n_auth > 1 ) ] )
m_df . to_csv ( ' ~/Downloads/tmp-m_df.csv ' , index = False )
len ( m_df . name . unique ( ) )
need = list ( m_df [ m_df . seen == 0 ] . update_authority . unique ( ) )
need = list ( m_df [ ( m_df . seen == 0 ) & ( m_df . n_auth == 1 ) ] . update_authority . unique ( ) )
len ( need )
# need = need + [
# need = [
# 'CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k' # taiyo
# , 'DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX' # DAA
# , 'daaLrDfvcT4joui5axwR2gCkGAroruJFzyVsacU926g' # Degen Egg
# , 'BL5U8CoFPewr9jFcKf3kE1BhdFS1J59cwGpeZrm7ZTeP' # Skullbot
# , 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf' # Boryoku
# , '7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D' # mindfolk
# , 'CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5' # jelly rascals
# , 'EcxEqUj4RNgdGJwPE3ktsM99Ea9ThPmXHUV5g37Qm4ju' # women monkey
# , 'EQSoRhbN9fEEYXKEE5Lg63Mqf17P3JydcWTvDhdMJW1N' # hydrascripts
# , '75CPiM9ywLgxhii9SQsNoA1SH3h66o5EhrYsazHR5Tqk' # hydrascripts
# , 'aury7LJUae7a92PBo35vVbP61GX8VbyxFKausvUtBrt' # aurory
# , 'ET3LWbEL6q4aUSjsX5xLyWktCwqKh6qsQE5j6TDZtZBY' # enviropass
# , '8ERR2gYrvXcJFuoNAbPRvHXtrJnAXXHgXKkVviwz9R6C' # enviroPass
# , 'GRDCbZBP1x2JxYf3rQQoPFGzF57LDPy7XtB1gEMaCqGV' # Space Robots
# , 'GenoS3ck8xbDvYEZ8RxMG3Ln2qcyoAN8CTeZuaWgAoEA' # Genopet
# , 'STEPNq2UGeGSzCyGVr2nMQAzf8xuejwqebd84wcksCK' # stepn
# , 'HcS8iaEHwUino8wKzcgC16hxHodnPCyacVYUdBaSZULP' # BASC
# , 'AvkbtawpmMSy571f71WsWEn41ATHg5iHw27LoYJdk8QA' # THUG
# , 'GH4QhJznKEHHv44AqEH5SUohkUauWyAFtu5u8zUWUKL4' # StepN Shoebox
# , 'FTQmhcD7SNBWrVxTgQMFr7xL2aA6adfAJJPBxGKU4VsZ' # Solstien
# ]
need = m_df [ m_df . update_authority . isin ( need ) ]
# m_df[m_df.lower_name.isin(seen)]
# m_df[-m_df.lower_name.isin(seen)]
# tmp = m_df[['update_authority','collection']].drop_duplicates().groupby(['update_authority']).collection.count().reset_index().rename(columns={'collection':'n_collection'})
# tmp = tmp.sort_values('n_collection', ascending=0)
# m_df = m_df.merge(tmp)
# m_df = m_df.sort_values(by=['n_collection','update_authority','collection'], ascending=[0,0,0])
l_df = pd . read_csv ( ' ./data/me_mints.csv ' )
fix = need . merge ( l_df [ [ ' name ' , ' mint_address ' ] ] )
# len(need.name.unique())
# len(fix.name.unique())
# fix = fix.sort_values(by=['update_authority','collection'], ascending=[0,0])
# fix.head()
# seen = []
# data = []
# meta = []
# fix = fix[-(fix.name.isin(seen))]
# start = time.time()
# it = 0
# tot = len(fix)
# scraper = cloudscraper.create_scraper()
# # for each collection
# for row in fix.iterrows():
# row = row[1]
# print(row['name'])
# if row['name'] in seen:
# print('Seen')
# continue
# url = 'https://api.solscan.io/nft/detail?mint={}'.format(row['mint_address'])
# t = scraper.get(url).text
# j = json.loads(t)
# # r = requests.get(url)
# # j = r.json()
# j['data']
# if not j['success']:
# print('Error')
# print(r)
# print(j)
# sleep(1)
# continue
# update_authority = j['data']['updateAuthority']
# collectionId = j['data']['collectionId']
# collection = j['data']['collection']
# offset = 0
# limit = 500
# while True:
# print(offset)
# url = 'https://api.solscan.io/collection/nft?sortBy=nameDec&collectionId={}&offset={}&limit={}'.format(collectionId, offset, limit)
# r = requests.get(url)
# js = r.json()['data']
# offset += limit
# if len(js) == 0:
# break
# for j in js:
# data += [[ update_authority, collectionId, collection, row['symbol'], row['name'], row['collection'], j['info']['mint'] ]]
# m = j['info']['meta']
# m['mint_address'] = j['info']['mint']
# m['name'] = row['name']
# m['update_authority'] = update_authority
# meta += [ m ]
# it += 1
# end = time.time()
# print('Finished {} / {} in {} minutes'.format(it, tot, round((end - start) / 60.0, 1)))
# old = pd.read_csv('./data/nft_label_tokens.csv')
# token_df = pd.DataFrame(data, columns=['update_authority','collectionId','solscan_collection','symbol','name','collection','mint'])
# token_df = token_df.append(old).drop_duplicates()
# token_df.to_csv('./data/nft_label_tokens.csv', index=False)
# old = pd.read_csv('./data/nft_label_metadata.csv')
# meta_df = pd.DataFrame(meta)
# meta_df = meta_df.append(old).drop_duplicates()
# meta_df.to_csv('./data/nft_label_metadata.csv', index=False)
# seen = list(token_df.name.unique())
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
# tmp[tmp.collection > 1]
# m_df.head()
# def f(x):
# x = re.sub('\(|\)', '', x)
# x = re.sub(' ', '_', x)
# x = re.sub('\'', '', x)
# return(x)
# m_df['collection'] = m_df.name.apply(lambda x: f(x) )
# x = 'asf (asf)'
# f(x)
# query = '''
# WITH base AS (
# SELECT *
# , ROW_NUMBER() OVER (PARTITION BY project_name ORDER BY insert_date DESC) AS rn
# FROM crosschain.address_labels
# WHERE blockchain = 'solana'
# AND label_subtype = 'nf_token_contract'
# )
# SELECT *
# FROM base
# '''
# examples = ctx.cursor().execute(query)
# examples = pd.DataFrame.from_records(iter(examples), columns=[x[0] for x in examples.description])
# examples = clean_colnames(examples)
# examples.head()
# examples[examples.address_name == 'paradisedao'].head()
# examples[examples.address == 'GUXSatf5AAFKmuQgSgn4GoGzBEhwJ9WAQRxeVt1vZvkb'].head()
# # m_df = pd.read_csv('./data/me_update_authorities.csv')
# # fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# fix = m_df[m_df.n_collection > 1].merge(examples[[ 'address','address_name' ]].rename(columns={'address_name':'name'}) )
# len(m_df[m_df.n_collection > 1].name.unique())
# len(fix.name.unique())
# j = list(fix.address.unique())
# with open('./data/fix_mints.json', 'w') as f:
# json.dump(j, f)
# seen = list(examples.address.unique())
# seen = []
# need = df[-df.mint_address.isin(seen)].sort_values(['collection','mint_address'])
# CDgbhX61QFADQAeeYKP5BQ7nnzDyMkkR3NEhYF2ETn1k - taiyo
# DC2mkgwhy56w3viNtHDjJQmc7SGu2QX785bS4aexojwX - DAA
# DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf - Boryoku
# 7hYkx2CNGRB8JE7X7GefX1ak1dqe7GxgYKbpfj9moE9D - mindfolk
# CjwNEVQFKk8YzZLCvvw6sNrjxiQW8dYDSzhTph18T7g5 - mindfolk
need = fix . copy ( ) . rename ( columns = { ' name ' : ' collection ' } )
# need = need.drop_duplicates(subset=['update_authority']).sort_values('collection').head(7).tail(1)
need = need . drop_duplicates ( subset = [ ' update_authority ' ] ) . sort_values ( ' collection ' )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ | ' , ' - ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ) ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ( ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \' ' , ' ' , x ) . strip ( ) )
need . collection . unique ( )
# need = need.drop_duplicates(subset=['collection']).sort_values('collection')
n = 0
# 1310 - 310
# need = need.tail(n).head(300).tail(25)
# need = need.tail(1009).head(17)
# need = need.tail(1009 - 17).head(17)
# 1-285, 1310-975
len ( need )
# print(n)
mfiles = [ ' /data/mints/ {} / {} _mint_accounts.json ' . format ( re . sub ( ' |- ' , ' _ ' , collection ) , update_authority ) for collection , update_authority in zip ( need . collection . values , need . update_authority . values ) ]
seen = [ x for x in mfiles if os . path . exists ( x ) ]
seen = [ ]
# for update authorities that have only 1 collection, we can just check metaboss once
rpc = ' https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/ '
# need = need.tail(400)
it = 0
tot = len ( need )
for row in need . iterrows ( ) :
it + = 1
row = row [ 1 ]
collection = row [ ' collection ' ]
print ( ' # {} / {} : {} ' . format ( it , tot , collection ) )
# if collection in seen:
# continue
update_authority = row [ ' update_authority ' ]
# print('Working on {}...'.format(collection))
collection_dir = re . sub ( ' |- ' , ' _ ' , collection )
dir = ' ./data/mints/ {} / ' . format ( collection_dir )
mfile = ' {} {} _mint_accounts.json ' . format ( dir , update_authority )
if not os . path . exists ( dir ) :
print ( collection )
os . makedirs ( dir )
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
seen . append ( update_authority )
os . system ( ' metaboss -r {} -t 300 snapshot mints --update-authority {} --output {} ' . format ( rpc , update_authority , dir ) )
# write the mints to csv
data = [ ]
for path in os . listdir ( ' ./data/mints/ ' ) :
if os . path . isdir ( ' ./data/mints/ ' + path ) :
collection = re . sub ( ' _ ' , ' ' , path ) . strip ( )
for fname in os . listdir ( ' ./data/mints/ ' + path ) :
f = ' ./data/mints/ ' + path + ' / ' + fname
if os . path . isfile ( f ) and ' .json ' in f :
with open ( f ) as file :
j = json . load ( file )
for m in j :
data + = [ [ collection , m ] ]
df = pd . DataFrame ( data , columns = [ ' collection ' , ' mint_address ' ] )
df . collection . unique ( )
df . to_csv ( ' ./data/single_update_auth_labels.csv ' , index = False )
################################
# Multiple Authorities #
################################
rpc = ' https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/ '
need = list ( m_df [ ( m_df . seen == 0 ) & ( m_df . n_auth > 1 ) ] . update_authority . unique ( ) )
need = m_df [ m_df . update_authority . isin ( need ) ]
fix = need . merge ( l_df [ [ ' name ' , ' mint_address ' ] ] )
need = fix . copy ( ) . rename ( columns = { ' name ' : ' collection ' } )
need = need . sort_values ( ' collection ' ) . drop_duplicates ( subset = [ ' update_authority ' ] , keep = ' first ' )
i = 5
sz = 112
t = len ( need ) - ( sz * ( i - 1 ) ) if sz * i > len ( need ) else sz
print ( t )
need = need . head ( sz * i ) . tail ( t )
# need = need.head(150 * 2).tail(150)
# need = need.head(150 * 3).tail(150)
# need = need.head(150 * 4).tail(150)
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ | ' , ' - ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ) ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \ ( ' , ' ' , x ) . strip ( ) )
need [ ' collection ' ] = need . collection . apply ( lambda x : re . sub ( ' \' ' , ' ' , x ) . strip ( ) )
need . collection . unique ( )
it = 0
a = [ ]
print ( i )
for row in need . iterrows ( ) :
it + = 1
# if it < 20:
# continue
# if it % 100 == 0:
# print('#{}/{}'.format(it, len(m_df)))
print ( ' # {} / {} ' . format ( it , len ( need ) ) )
row = row [ 1 ]
collection = row [ ' collection ' ]
if collection in seen :
continue
update_authority = row [ ' update_authority ' ]
print ( ' Working on {} ... ' . format ( collection ) )
collection_dir = re . sub ( ' |- ' , ' _ ' , collection )
dir = ' ./data/mints/ {} / ' . format ( collection_dir )
mfile = ' {} {} _mint_accounts.json ' . format ( dir , update_authority )
if not os . path . exists ( dir ) :
print ( collection )
os . makedirs ( dir )
# elif len(os.listdir(dir)) and os.path.exists(mfile):
# print('Already have {}.'.format(collection))
# print('Seen')
# continue
print ( ' LETS GOOO ' )
a . append ( update_authority )
os . system ( ' metaboss -r {} -t 300 snapshot mints --update-authority {} --output {} ' . format ( rpc , update_authority , dir ) )
# len(need)
# len(need.drop_duplicates(subset=['mint_address']))
# len(need.collection.unique())
# tot = len(need.collection.unique())
# it = 0
# # for each collection, get all the mints from metaboss
# for c in need.collection.unique():
# it += 1
# print('#{} / {}: {}'.format(it, tot, c))
# dir = './data/fix_labels_1/{}/'.format(re.sub(' ', '_', c))
odir = dir + ' output/ '
# if not os.path.exists(dir):
# print('Making dir {}'.format(dir))
# os.makedirs(dir)
if not os . path . exists ( odir ) :
print ( ' Making dir {} ' . format ( odir ) )
os . makedirs ( odir )
# elif os.path.exists(dir+'mints.json'):
# print('Already Seen')
# continue
# ms = list(need[need.collection == c].mint_address.unique())
# with open(dir+'mints.json', 'w') as f:
# json.dump(ms, f)
os . system ( ' metaboss -r {} -t 300 decode mint --list-file {} --output {} ' . format ( rpc , mfile , odir ) )
##################################################
# Load All The Mints for Each Collection #
##################################################
# now that we have the mints, create a data frame with the info for each mint in each collection
data = [ ]
seen = [ x [ 1 ] for x in data ]
it = 0
dirs = os . listdir ( ' ./data/mints/ ' )
for path in dirs :
print ( it )
it + = 1
if os . path . isdir ( ' ./data/mints/ ' + path ) :
collection = re . sub ( ' _ ' , ' ' , path ) . strip ( )
if not os . path . exists ( ' ./data/mints/ ' + path + ' /output/ ' ) :
continue
fnames = os . listdir ( ' ./data/mints/ ' + path + ' /output/ ' )
print ( collection , len ( fnames ) )
for fname in fnames :
f = ' ./data/mints/ ' + path + ' /output/ ' + fname
if fname [ : - 5 ] in seen :
continue
if os . path . isfile ( f ) and ' .json ' in f :
try :
with open ( f ) as file :
j = json . load ( file )
data + = [ [ collection , fname , j [ ' name ' ] , j [ ' symbol ' ] , j [ ' uri ' ] ] ]
except :
print ( ' Error {} ' . format ( fname [ : - 5 ] ) )
##################################################
# Load All The Mints for Each Collection #
##################################################
new_mints = pd . DataFrame ( data , columns = [ ' collection ' , ' mint_address ' , ' name ' , ' symbol ' , ' uri ' ] )
# tmp = tmp[-(tmp.collection.isin(['Dskullys','Decimusdynamics']))]
n = len ( new_mints [ ( new_mints . uri . isnull ( ) ) | ( new_mints . uri == ' ' ) ] )
tot = len ( new_mints )
pct = round ( n * 100 / tot , 1 )
print ( ' {} ( {} % ) rows have no uri ' . format ( n , pct ) )
new_mints = new_mints [ new_mints . uri != ' ' ]
# function to clean the name of each NFT (remove the number)
def f_cn ( x ) :
if not x or x != x :
return ( x )
if ' # ' in x [ - 6 : ] :
x = ' ' . join ( re . split ( ' # ' , x ) [ : - 1 ] ) . strip ( )
elif bool ( re . match ( ' .+ \ s+[0-9]+ ' , x ) ) :
x = ' ' . join ( re . split ( ' ' , x ) [ : - 1 ] ) . strip ( )
2022-04-20 22:47:18 +00:00
return ( x )
2022-07-14 22:10:02 +00:00
new_mints [ ' clean_name ' ] = new_mints . name . apply ( lambda x : f_cn ( x ) )
# determine for each collection if we should look at collection-name-symbol, collection-symbol, or just collection to determine what collection it actuallly belongs to
# this logic is because e.g. some only have a few names in the collection so we can iterate, but some have a different name for each NFT, so we assume its the same collection for all
a = new_mints . drop_duplicates ( subset = [ ' collection ' , ' clean_name ' , ' symbol ' ] ) . groupby ( [ ' collection ' ] ) . uri . count ( ) . reset_index ( ) . sort_values ( ' uri ' , ascending = 0 )
symbol_only = a [ a . uri > 10 ] . collection . unique ( )
b = new_mints [ new_mints . collection . isin ( symbol_only ) ] . drop_duplicates ( subset = [ ' collection ' , ' symbol ' ] ) . groupby ( [ ' collection ' ] ) . uri . count ( ) . reset_index ( ) . sort_values ( ' uri ' , ascending = 0 )
collection_only = b [ b . uri > 10 ] . collection . unique ( )
# now get the info for each collection-name-symbol combo
g1 = new_mints [ ( - ( new_mints . collection . isin ( symbol_only ) ) ) & ( - ( new_mints . collection . isin ( collection_only ) ) ) ] . groupby ( [ ' collection ' , ' clean_name ' , ' symbol ' ] ) . head ( 1 ) . reset_index ( )
g2 = new_mints [ ( ( new_mints . collection . isin ( symbol_only ) ) ) & ( - ( new_mints . collection . isin ( collection_only ) ) ) ] . groupby ( [ ' collection ' , ' symbol ' ] ) . head ( 1 ) . reset_index ( )
g3 = new_mints [ ( - ( new_mints . collection . isin ( symbol_only ) ) ) & ( ( new_mints . collection . isin ( collection_only ) ) ) ] . groupby ( [ ' collection ' ] ) . head ( 1 ) . reset_index ( )
g = g1 . append ( g2 ) . append ( g3 ) . drop_duplicates ( subset = [ ' mint_address ' ] )
print ( ' {} Total: {} all, {} collection-symbol {} collection ' . format ( len ( g ) , len ( g1 ) , len ( g2 ) , len ( g3 ) ) )
g . to_csv ( ' ~/Downloads/tmp-g.csv ' , index = False )
# iterate over each row to get what collection they are actually in
# by pulling data from the uri
uri_data = [ ]
it = 0
tot = len ( g )
print ( tot )
errs = [ ]
seen = [ x [ ' uri ' ] for x in uri_data ]
# for row in g.iterrows():
for row in g [ - ( g . uri . isin ( seen ) ) ] . iterrows ( ) :
row = row [ 1 ]
it + = 1
if it % 100 == 0 :
uri_df = pd . DataFrame ( uri_data ) [ [ ' collection ' , ' name ' , ' symbol ' , ' row_symbol ' , ' row_collection ' , ' uri ' , ' row_clean_name ' , ' mint_address ' ] ]
uri_df . to_csv ( ' ~/Downloads/uri_df.csv ' , index = False )
print ( ' # {} / {} : {} ' . format ( it , tot , row [ ' collection ' ] ) )
try :
r = requests . get ( row [ ' uri ' ] )
j = r . json ( )
j [ ' uri ' ] = row [ ' uri ' ]
j [ ' row_collection ' ] = row [ ' collection ' ]
j [ ' row_clean_name ' ] = row [ ' clean_name ' ]
j [ ' row_symbol ' ] = row [ ' symbol ' ]
j [ ' mint_address ' ] = row [ ' mint_address ' ]
uri_data + = [ j ]
except :
print ( ' Error ' )
errs . append ( row )
uri_df = pd . DataFrame ( uri_data ) [ [ ' collection ' , ' name ' , ' symbol ' , ' row_symbol ' , ' row_collection ' , ' uri ' , ' row_clean_name ' , ' mint_address ' ] ]
uri_df . to_csv ( ' ~/Downloads/uri_df.csv ' , index = False )
# for each row, parse the json from the uri
uri_df = pd . read_csv ( ' ~/Downloads/uri_df.csv ' )
def f ( x , c ) :
x = str ( x )
try :
n = json . loads ( re . sub ( " ' " , " \" " , x ) ) [ c ]
if type ( n ) == list :
return ( n [ 0 ] )
return ( n )
except :
try :
return ( json . loads ( re . sub ( " ' " , " \" " , x ) ) [ c ] )
except :
try :
return ( json . loads ( re . sub ( " ' " , " \" " , x ) ) [ 0 ] [ c ] )
except :
try :
return ( json . loads ( re . sub ( " ' " , " \" " , x ) ) [ 0 ] )
except :
return ( x )
# parse the json more
uri_df [ ' parsed_collection ' ] = uri_df . collection . apply ( lambda x : f ( x , ' name ' ) )
uri_df [ ' parsed_family ' ] = uri_df . collection . apply ( lambda x : f ( x , ' family ' ) )
uri_df [ ' clean_name ' ] = uri_df . name . apply ( lambda x : f_cn ( x ) )
# calculate what the collection name is
uri_df [ ' use_collection ' ] = uri_df . parsed_collection . replace ( ' ' , None ) . fillna ( uri_df . clean_name ) #.fillna( uri_df.row_symbol )
uri_df [ uri_df . use_collection == ' nan ' ] [ [ ' use_collection ' , ' parsed_collection ' , ' parsed_family ' , ' clean_name ' , ' name ' , ' collection ' , ' symbol ' , ' row_symbol ' , ' row_collection ' ] ] . head ( )
uri_df [ uri_df . use_collection == ' nan ' ] [ [ ' use_collection ' , ' parsed_collection ' , ' parsed_family ' , ' clean_name ' , ' name ' , ' collection ' , ' symbol ' , ' row_symbol ' , ' row_collection ' ] ] . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
len ( uri_df )
# clean the collection name
def f1 ( x ) :
try :
if len ( x [ ' use_collection ' ] ) == 1 :
return ( x [ ' clean_name ' ] )
if bool ( re . match ( ' .+ \ s+#[0-9]+ ' , x [ ' use_collection ' ] ) ) :
return ( ' ' . join ( re . split ( ' # ' , x [ ' use_collection ' ] ) [ : - 1 ] ) . strip ( ) )
if ' { ' in x [ ' use_collection ' ] :
return ( x [ ' clean_name ' ] )
return ( x [ ' use_collection ' ] . strip ( ) . title ( ) )
except :
return ( x [ ' use_collection ' ] . strip ( ) . title ( ) )
uri_df [ ' tmp ' ] = uri_df . apply ( lambda x : f1 ( x ) , 1 )
uri_df [ uri_df . tmp == ' Nan ' ] [ ' use_collection ' , ' tmp ' ]
uri_df [ ' use_collection ' ] = uri_df . apply ( lambda x : f1 ( x ) , 1 )
sorted ( uri_df . use_collection . unique ( ) ) [ : 20 ]
sorted ( uri_df . use_collection . unique ( ) ) [ - 20 : ]
# clean the mint_address
uri_df [ ' mint_address ' ] = uri_df . mint_address . apply ( lambda x : re . sub ( ' .json ' , ' ' , x ) )
uri_df . head ( )
uri_df = uri_df . fillna ( ' None ' )
for i in range ( 2 ) :
# for each collection-name-symbol combo, see how many have multiple mappings
a = uri_df . copy ( ) . fillna ( ' None ' )
a = a [ [ ' row_collection ' , ' row_clean_name ' , ' row_symbol ' , ' use_collection ' ] ] . drop_duplicates ( ) . groupby ( [ ' row_collection ' , ' row_clean_name ' , ' row_symbol ' ] ) . use_collection . count ( ) . reset_index ( ) . rename ( columns = { ' use_collection ' : ' n_1 ' } )
uri_df = merge ( uri_df , a , ensure = True )
# for each collection-symbol combo, see how many have multiple mappings
a = uri_df . copy ( ) . fillna ( ' None ' )
a = a [ [ ' row_collection ' , ' row_symbol ' , ' use_collection ' ] ] . drop_duplicates ( ) . groupby ( [ ' row_collection ' , ' row_symbol ' ] ) . use_collection . count ( ) . reset_index ( ) . rename ( columns = { ' use_collection ' : ' n_2 ' } )
uri_df = merge ( uri_df , a , ensure = True )
# for each collection combo, see how many have multiple mappings
a = uri_df . copy ( ) . fillna ( ' None ' )
a = a [ [ ' row_collection ' , ' use_collection ' ] ] . drop_duplicates ( ) . groupby ( [ ' row_collection ' ] ) . use_collection . count ( ) . reset_index ( ) . rename ( columns = { ' use_collection ' : ' n_3 ' } )
uri_df = merge ( uri_df , a , ensure = True )
uri_df [ ' n ' ] = uri_df . apply ( lambda x : x [ ' n_3 ' ] if x [ ' row_collection ' ] in collection_only else x [ ' n_2 ' ] if x [ ' row_collection ' ] in symbol_only else x [ ' n_1 ' ] , 1 )
print ( ' {} / {} ( {} % ) have multiple collection-name-symbol mappings ' . format ( len ( uri_df [ uri_df . n > 1 ] ) , len ( uri_df ) , round ( 100.0 * len ( uri_df [ uri_df . n > 1 ] ) / len ( uri_df ) ) ) )
# if there is multiple, use the parsed_family instead of the use_collection
uri_df [ ' use_collection ' ] = uri_df . apply ( lambda x : x [ ' use_collection ' ] if x [ ' n ' ] == 1 else x [ ' parsed_family ' ] , 1 )
del uri_df [ ' n_1 ' ]
del uri_df [ ' n_2 ' ]
del uri_df [ ' n_3 ' ]
# only take rows where there is a single mapping
m = uri_df [ uri_df . n == 1 ] [ [ ' use_collection ' , ' row_collection ' , ' row_clean_name ' , ' row_symbol ' ] ] . dropna ( ) . drop_duplicates ( )
m . columns = [ ' use_collection ' , ' collection ' , ' clean_name ' , ' symbol ' ]
m_1 = new_mints [ ( - ( new_mints . collection . isin ( symbol_only ) ) ) & ( - ( new_mints . collection . isin ( collection_only ) ) ) ] . fillna ( ' ' ) . merge ( m . fillna ( ' ' ) , how = ' left ' )
m_2 = new_mints [ ( ( new_mints . collection . isin ( symbol_only ) ) ) & ( - ( new_mints . collection . isin ( collection_only ) ) ) ] [ [ ' collection ' , ' mint_address ' , ' symbol ' ] ] . fillna ( ' ' ) . merge ( m . fillna ( ' ' ) , how = ' left ' )
m_3 = new_mints [ ( - ( new_mints . collection . isin ( symbol_only ) ) ) & ( ( new_mints . collection . isin ( collection_only ) ) ) ] [ [ ' collection ' , ' mint_address ' ] ] . fillna ( ' ' ) . merge ( m . fillna ( ' ' ) , how = ' left ' )
len ( m_1 ) + len ( m_2 ) + len ( m_3 )
len ( new_mints )
# m = new_mints.fillna('').merge(m.fillna(''), how='left')
m = m_1 . append ( m_2 ) . append ( m_3 )
print ( ' After all this, we have {} % o f the mints ' . format ( round ( len ( m ) * 100 / len ( new_mints ) ) ) )
len ( new_mints )
len ( m )
m [ ' mint_address ' ] = m . mint_address . apply ( lambda x : re . sub ( ' .json ' , ' ' , x ) )
m = m [ [ ' mint_address ' , ' use_collection ' ] ] . dropna ( ) . drop_duplicates ( )
m . columns = [ ' mint_address ' , ' collection ' ]
2022-04-20 22:47:18 +00:00
2022-07-14 22:10:02 +00:00
m [ m . collection . isnull ( ) ] . head ( )
m [ m . collection == ' Nan ' ] . head ( )
m = m [ m . collection != ' Nan ' ]
tmp = m . groupby ( ' collection ' ) . mint_address . count ( ) . reset_index ( ) . sort_values ( ' mint_address ' , ascending = 0 )
tmp . head ( )
m . to_csv ( ' ./data/mult_update_auth_labels.csv ' , index = False )
################
# DONE #
################
tokens = m . append ( pd . read_csv ( ' ./data/tokens.csv ' ) [ [ ' collection ' , ' mint_address ' ] ] ) . drop_duplicates ( subset = [ ' mint_address ' ] , keep = ' last ' )
tokens . to_csv ( ' ./data/mints-2022-06-13-2pm.csv ' , index = False )
tokens . head ( )
m . to_csv ( ' ./data/mints-2022-06-09.csv ' , index = False )
m = pd . read_csv ( ' ./data/mints-2022-06-09.csv ' )
m . groupby ( ' collection ' ) . head ( 1 ) . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
len ( m )
len ( m . mint_address . unique ( ) )
m . head ( )
m . head ( )
# m = m.merge(symbol_map, how='left', on='symbol')
# m['use_collection'] = m.use_collection_x.fillna(m.use_collection_y)
len ( new_mints )
len ( m )
len ( m [ m . use_collection . isnull ( ) ] )
len ( m [ m . use_collection . isnull ( ) ] ) / len ( m )
len ( m [ m . use_collection_x . isnull ( ) ] ) / len ( m )
m [ m . use_collection . isnull ( ) ] . fillna ( ' ' ) . drop_duplicates ( subset = [ ' collection ' , ' clean_name ' , ' symbol ' ] ) . to_csv ( ' ~/Downloads/tmp-3.csv ' , index = False )
m [ m . use_collection . isnull ( ) ] . drop_duplicates ( subset = [ ' collection ' ] ) . to_csv ( ' ~/Downloads/tmp-3.csv ' , index = False )
a = uri_df [ ( uri_df . parsed_collection . isnull ( ) ) | ( uri_df . parsed_collection == ' ' ) ] . groupby ( ' row_clean_name ' ) . uri . count ( ) . reset_index ( )
a = uri_df [ ( uri_df . parsed_collection . isnull ( ) ) | ( uri_df . parsed_collection == ' ' ) ]
uri_df . head ( )
uri_df [ ' row_clean_name ' ] = uri_df . row_clean_name . apply ( lambda x : f_cn ( x ) )
id_map = uri_df
a . to_csv ( ' ~/Downloads/tmp-1.csv ' , index = False )
len ( uri_df )
n = uri_df . groupby ( )
uri_df
uri_df
uri_df . head ( )
uri_df [ [ ' symbol ' , ' collection ' , ' ' ] ]
uri_df . head ( )
2022-04-20 22:47:18 +00:00
2022-06-06 19:15:10 +00:00
query = '''
SELECT DISTINCT project_name
FROM crosschain . address_labels
WHERE blockchain = ' solana '
AND label_subtype = ' nf_token_contract '
AND project_name IS NOT NULL
'''
labels = ctx . cursor ( ) . execute ( query )
labels = pd . DataFrame . from_records ( iter ( labels ) , columns = [ x [ 0 ] for x in labels . description ] )
labels = clean_colnames ( labels )
2022-04-20 22:47:18 +00:00
seen = [ x for x in m_df . collection . unique ( ) if os . path . exists ( ' ./data/mints/ {} / ' . format ( x ) ) and len ( os . listdir ( ' ./data/mints/ {} / ' . format ( x ) ) ) ]
2022-06-06 19:15:10 +00:00
seen = seen + [ re . sub ( ' _ ' , ' ' , f ( x . lower ( ) ) ) for x in labels . project_name . unique ( ) ]
m_df = m_df [ m_df . symbol . notnull ( ) ]
m_df [ ' tmp ' ] = m_df . name . apply ( lambda x : re . sub ( ' _ ' , ' ' , f ( x . lower ( ) ) ) )
m_df [ m_df . symbol == ' the_last_apes ' ]
# m_df.to_csv('~/Downloads/tmp.csv', index=False)
len ( m_df [ m_df . tmp . isin ( seen ) ] )
[ x for x in seen if not x in m_df . tmp . unique ( ) ] [ : 11 ]
m_df [ m_df . symbol == ' apesquad ' ]
m_df [ m_df . symbol == ' chimp_frens ' ]
2022-07-14 22:10:02 +00:00
url = ' https://api.solscan.io/nft/detail?mint=D5pT5HYPeQkHD6ryoHxnc2jdcUMYmjs6sS6LswbSDsuy '
us = sorted ( m_df [ m_df . n_collection > 1 ] . update_authority . unique ( ) )
u = us [ 1 ]
m_df [ m_df . update_authority == u ]
m_df [ m_df . mint == ' G3xiAFZEp49BJc8nNrDJxwTXZ34teKH7CRf5KTGakxte ' ]
data = [ ]
for u in us [ : 10 ] :
nfts = BLOCKCHAIN_API_RESOURCE . search_nfts (
update_authority = u
, update_authority_search_method = SearchMethod . EXACT_MATCH
)
print ( u , len ( nfts ) )
for n in nfts :
m = n [ ' nft_metadata ' ]
data + = [ [ m [ ' update_authority ' ] , m [ ' mint ' ] , m [ ' data ' ] [ ' symbol ' ] , m [ ' data ' ] [ ' name ' ] ] ]
nft_df = pd . DataFrame ( data , columns = [ ' update_authority ' , ' mint ' , ' symbol ' , ' name ' ] )
len ( nft_df . update_authority . unique ( ) )
nft_df [ ' collection ' ] = nft_df . name . apply ( lambda x : re . split ( ' # ' , x ) [ 0 ] . strip ( ) )
nft_df . groupby ( [ ' symbol ' , ' collection ' ] ) . mint . count ( )
nft_df . groupby ( [ ' symbol ' , ' name ' ] ) . mint . count ( )
2022-04-20 22:47:18 +00:00
print ( len ( seen ) )
# m_df = m_df.merge(lp_df)
len ( m_df )
it = 0
2022-06-06 19:15:10 +00:00
m_df = m_df [ ( - m_df . tmp . isin ( seen ) ) & ( - m_df . collection . isin ( seen ) ) & ( - m_df . name . isin ( seen ) ) ]
2022-04-15 02:39:10 +00:00
rpc = ' https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/ '
2022-06-06 19:15:10 +00:00
len ( seen )
2022-04-20 22:47:18 +00:00
for row in m_df . sort_values ( ' collection ' ) . iterrows ( ) :
it + = 1
2022-06-06 19:15:10 +00:00
# if it < 20:
# continue
2022-04-20 22:47:18 +00:00
if it % 100 == 0 :
print ( ' # {} / {} ' . format ( it , len ( m_df ) ) )
2022-04-15 02:39:10 +00:00
row = row [ 1 ]
2022-04-20 22:47:18 +00:00
collection = row [ ' collection ' ]
if collection in seen :
continue
2022-04-15 02:39:10 +00:00
update_authority = row [ ' update_authority ' ]
print ( ' Working on {} ... ' . format ( collection ) )
collection_dir = re . sub ( ' ' , ' _ ' , collection )
dir = ' ./data/mints/ {} / ' . format ( collection_dir )
if not os . path . exists ( dir ) :
os . makedirs ( dir )
2022-04-20 22:47:18 +00:00
elif len ( os . listdir ( dir ) ) :
2022-04-15 02:39:10 +00:00
# print('Already have {}.'.format(collection))
print ( ' Seen ' )
continue
os . system ( ' metaboss -r {} -t 300 snapshot mints --update-authority {} --output {} ' . format ( rpc , update_authority , dir ) )
# os.system('metaboss -r {} -t 300 derive metadata mints --update-authority {} --output {}'.format(rpc, update_authority, dir))
2022-06-06 19:15:10 +00:00
# fname = os.listdir(dir)
# if len(fname) == 1:
# fname = dir+fname[0]
2022-04-15 02:39:10 +00:00
2022-06-06 19:15:10 +00:00
# dir_mints = '{}mints/'.format(dir)
# if not os.path.exists(dir_mints):
# os.makedirs(dir_mints)
# os.system('metaboss -r {} -t 300 decode mint --list-file {} --output {}'.format(rpc, fname, dir_mints))
2022-04-15 02:39:10 +00:00
2022-04-20 22:47:18 +00:00
data = [ ]
for path in os . listdir ( ' ./data/mints/ ' ) :
if os . path . isdir ( ' ./data/mints/ ' + path ) :
collection = re . sub ( ' _ ' , ' ' , path ) . strip ( )
for fname in os . listdir ( ' ./data/mints/ ' + path ) :
f = ' ./data/mints/ ' + path + ' / ' + fname
if os . path . isfile ( f ) and ' .json ' in f :
with open ( f ) as file :
j = json . load ( file )
for m in j :
data + = [ [ collection , m ] ]
df = pd . DataFrame ( data , columns = [ ' collection ' , ' mint_address ' ] )
df = df [ df . collection != ' etc ' ]
2022-06-06 19:15:10 +00:00
# df = df.drop_duplicates(subset='mint_address')
2022-04-20 22:47:18 +00:00
df = df . drop_duplicates ( )
df [ ' n ' ] = 1
g = df . groupby ( [ ' mint_address ' ] ) . n . sum ( ) . reset_index ( )
g = g [ g . n > 1 ]
2022-06-06 19:15:10 +00:00
len ( g )
2022-04-20 22:47:18 +00:00
tmp_0 = g [ [ ' mint_address ' ] ] . merge ( df ) . groupby ( ' collection ' ) . n . count ( ) . reset_index ( ) . sort_values ( ' n ' , ascending = 0 )
2022-06-06 19:15:10 +00:00
tmp_0 . head ( 20 )
tmp_0 . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
2022-04-20 22:47:18 +00:00
tmp = g . merge ( df [ [ ' collection ' , ' mint_address ' ] ] )
tmp = tmp . sort_values ( [ ' mint_address ' , ' collection ' ] )
2022-06-06 19:15:10 +00:00
tmp . collection . unique ( )
len ( tmp . collection . unique ( ) )
2022-04-20 22:47:18 +00:00
len ( df . collection . unique ( ) )
2022-06-06 19:15:10 +00:00
rem = tmp . collection . unique ( )
df = df [ - df . collection . isin ( rem ) ]
df . to_csv ( ' ~/Downloads/solana_nft_tags.csv ' , index = False )
2022-04-15 02:39:10 +00:00
2022-02-21 16:10:04 +00:00
def mint_address_token_id_map_2 ( ) :
old = pd . read_csv ( ' ./data/mint_address_token_id_map.csv ' )
old = pd . DataFrame ( )
mints = pd . read_csv ( ' ./data/solana_mints.csv ' )
data = [ ]
for collection in [ ' Stoned Ape Crew ' , ' DeGods ' ] :
for m in mints [ mints . collection == collection ] . mint_address . unique ( ) :
pass
f = open ( ' ./data/mints/ {} / {} .json ' . format ( collection , m ) )
j = json . load ( f )
try :
token_id = int ( re . split ( ' # ' , j [ ' name ' ] ) [ 1 ] )
data + = [ [ collection , m , token_id , j [ ' uri ' ] ] ]
except :
print ( m )
df = pd . DataFrame ( data , columns = [ ' collection ' , ' mint ' , ' token_id ' , ' uri ' ] )
old = old . append ( df ) . drop_duplicates ( )
print ( old [ old . token_id . notnull ( ) ] . groupby ( ' collection ' ) . token_id . count ( ) )
old . to_csv ( ' ./data/mint_address_token_id_map.csv ' , index = False )
def mint_address_token_id_map ( ) :
mints = pd . read_csv ( ' ./data/solana_mints.csv ' )
mints [ mints . collection == ' Stoned Ape Crew ' ] [ [ ' mint_address ' ] ] . drop_duplicates ( ) . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
mints [ mints . collection == ' Degods ' ] [ [ ' mint_address ' ] ] . drop_duplicates ( ) . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
mints [ mints . collection == ' DeGods ' ] [ [ ' mint_address ' ] ] . drop_duplicates ( ) . to_csv ( ' ~/Downloads/tmp.csv ' , index = False )
old = pd . read_csv ( ' ./data/mint_address_token_id_map.csv ' )
2022-02-18 21:56:05 +00:00
my_file = open ( ' ./scripts/solana-rpc-app/output.txt ' , ' r ' )
content = my_file . read ( )
my_file . close ( )
content_list = content . split ( ' [ ' )
data = [ ]
for c in content_list :
s = re . split ( ' , ' , c )
if len ( s ) > 1 and ' # ' in s [ 1 ] :
data + = [ [ re . split ( ' " ' , s [ 0 ] ) [ 1 ] , int ( re . split ( ' # ' , re . split ( ' " ' , s [ 1 ] ) [ 1 ] ) [ 1 ] ) ] ]
df = pd . DataFrame ( data , columns = [ ' mint ' , ' token_id ' ] ) . drop_duplicates ( )
2022-02-21 16:10:04 +00:00
df [ ' collection ' ] = ' DeGods '
df . to_csv ( ' ./data/mint_address_token_id_map.csv ' , index = False )
2022-04-11 04:07:14 +00:00
def mint_address_token_id_map ( ) :
old = pd . read_csv ( ' ./data/mint_address_token_id_map.csv ' )
l0 = len ( old )
tokens = pd . read_csv ( ' ./data/tokens.csv ' ) [ [ ' collection ' , ' token_id ' , ' mint_address ' ] ] . rename ( columns = { ' mint_address ' : ' mint ' } ) . dropna ( )
tokens [ ' uri ' ] = None
tokens = tokens [ - tokens . collection . isin ( old . collection . unique ( ) ) ]
old = old . append ( tokens )
print ( ' Adding {} rows ' . format ( len ( old ) - l0 ) )
old . to_csv ( ' ./data/mint_address_token_id_map.csv ' , index = False )
2022-02-21 16:10:04 +00:00
def add_solana_sales ( ) :
print ( ' Adding Solana sales... ' )
2022-02-18 21:56:05 +00:00
query = '''
2022-06-06 19:15:10 +00:00
WITH mints AS (
SELECT DISTINCT LOWER ( mint ) AS mint
, token_id
, project_name AS collection
FROM solana . dim_nft_metadata
WHERE mint IS NOT NULL
AND token_id IS NOT NULL
AND project_name IS NOT NULL
AND project_name IN (
' Astrals ' ,
' Aurory ' ,
' Cets on Creck ' ,
' Catalina Whale Mixer ' ,
' DeFi Pirates ' ,
' DeGods ' ,
' Degen Apes ' ,
' Meerkat Millionaires ' ,
' Okay Bears ' ,
' Pesky Penguins ' ,
' SOLGods ' ,
' Solana Monkey Business ' ,
' Stoned Ape Crew ' ,
' Thugbirdz '
)
)
2022-02-18 21:56:05 +00:00
SELECT tx_id
2022-04-05 05:25:23 +00:00
, s . mint
2022-06-06 19:15:10 +00:00
, m . collection
2022-05-01 05:50:55 +00:00
, s . block_timestamp AS sale_date
, m . token_id
, sales_amount AS price
FROM solana . fact_nft_sales s
2022-06-06 19:15:10 +00:00
JOIN mints m ON LOWER ( m . mint ) = LOWER ( s . mint )
WHERE block_timestamp > = CURRENT_DATE - 20
2022-05-01 05:50:55 +00:00
'''
2022-02-18 21:56:05 +00:00
sales = ctx . cursor ( ) . execute ( query )
sales = pd . DataFrame . from_records ( iter ( sales ) , columns = [ x [ 0 ] for x in sales . description ] )
sales = clean_colnames ( sales )
2022-06-06 19:15:10 +00:00
len ( sales )
len ( sales . tx_id . unique ( ) )
2022-05-01 05:50:55 +00:00
2022-05-04 00:24:11 +00:00
m = sales [ [ ' tx_id ' , ' collection ' , ' token_id ' , ' sale_date ' , ' price ' ] ]
m [ ' sale_date ' ] = m . sale_date . apply ( lambda x : str ( x ) [ : 19 ] )
old = pd . read_csv ( ' ./data/sales.csv ' )
go = old . groupby ( ' collection ' ) . token_id . count ( ) . reset_index ( ) . rename ( columns = { ' token_id ' : ' n_old ' } )
l0 = len ( old )
app = old [ old . collection . isin ( m . collection . unique ( ) ) ] . append ( m )
app [ ' tmp ' ] = app . apply ( lambda x : x [ ' collection ' ] + str ( int ( float ( x [ ' token_id ' ] ) ) ) + x [ ' sale_date ' ] [ : 10 ] , 1 )
if len ( app [ app . tx_id . isnull ( ) ] ) :
app [ ' null_tx ' ] = app . tx_id . isnull ( ) . astype ( int )
app = app . sort_values ( ' null_tx ' , ascending = 1 )
app = app . drop_duplicates ( subset = [ ' tmp ' ] , keep = ' first ' )
app [ ' tx_id ' ] = app . tx_id . fillna ( app . tmp )
old = old [ - old . collection . isin ( m . collection . unique ( ) ) ]
2022-06-06 19:15:10 +00:00
app = app . drop_duplicates ( subset = [ ' tx_id ' ] )
2022-05-04 00:24:11 +00:00
old = old . append ( app )
old = old [ [ ' collection ' , ' token_id ' , ' sale_date ' , ' price ' , ' tx_id ' ] ]
2022-06-06 19:15:10 +00:00
old [ ' token_id ' ] = old . token_id . astype ( str )
# old = old.drop_duplicates(subset=['tx_id'])
# old[old.tx_id.isnull()]
2022-05-04 00:24:11 +00:00
# check changes
l1 = len ( old )
gn = old . groupby ( ' collection ' ) . token_id . count ( ) . reset_index ( ) . rename ( columns = { ' token_id ' : ' n_new ' } )
g = gn . merge ( go , how = ' outer ' , on = [ ' collection ' ] ) . fillna ( 0 )
g [ ' dff ' ] = g . n_new - g . n_old
g = g [ g . dff != 0 ] . sort_values ( ' dff ' , ascending = 0 )
print ( g )
print ( ' Added {} sales ' . format ( l1 - l0 ) )
old . to_csv ( ' ./data/sales.csv ' , index = False )
return ( old )
2022-02-18 21:56:05 +00:00
2022-03-21 16:34:42 +00:00
def add_eth_sales ( ) :
print ( ' Adding ETH sales... ' )
query = '''
SELECT project_name
, token_id
, block_timestamp AS sale_date
, price
2022-05-04 00:24:11 +00:00
, tx_id
2022-03-21 16:34:42 +00:00
FROM ethereum . nft_events
WHERE project_name IN (
' BoredApeYachtClub '
, ' MutantApeYachtClub '
, ' BoredApeKennelClub '
)
'''
sales = ctx . cursor ( ) . execute ( query )
sales = pd . DataFrame . from_records ( iter ( sales ) , columns = [ x [ 0 ] for x in sales . description ] )
sales = clean_colnames ( sales )
2022-04-05 05:25:23 +00:00
# print('Queried {} sales'.format(len(sales)))
2022-05-04 00:24:11 +00:00
# sales['chain'] = 'Ethereum'
sorted ( sales . project_name . unique ( ) )
2022-03-21 16:34:42 +00:00
sales [ ' collection ' ] = sales . project_name . apply ( lambda x : clean_name ( x ) )
2022-04-05 05:25:23 +00:00
# print(sales.groupby('collection').sale_date.max())
2022-03-21 16:34:42 +00:00
sorted ( sales . collection . unique ( ) )
# m = sales.merge(id_map, how='left', on=['mint','collection'])
# m = sales.merge(id_map, how='inner', on=['mint','collection'])
# m.sort_values('collection')
2022-05-04 00:24:11 +00:00
m = sales [ [ ' collection ' , ' token_id ' , ' sale_date ' , ' price ' , ' tx_id ' ] ]
2022-03-21 16:34:42 +00:00
old = pd . read_csv ( ' ./data/sales.csv ' )
l0 = len ( old )
2022-03-22 20:27:08 +00:00
old = old [ old . collection != ' Bakc ' ]
2022-03-21 16:34:42 +00:00
old = old [ - old . collection . isin ( sales . collection . unique ( ) ) ]
old = old . append ( m )
2022-04-05 05:25:23 +00:00
# print(old.groupby('collection').token_id.count())
2022-03-21 16:34:42 +00:00
l1 = len ( old )
print ( ' Added {} sales ' . format ( l1 - l0 ) )
old . to_csv ( ' ./data/sales.csv ' , index = False )
pass
2022-01-28 23:05:50 +00:00
def solana_metadata ( ) :
2022-02-04 04:35:13 +00:00
metadata = pd . read_csv ( ' ./data/metadata.csv ' )
metadata [ metadata . collection == ' Solana Monkey Business ' ] . feature_name . unique ( )
metadata = metadata [ metadata . collection . isin ( [ ' Aurory ' , ' Degen Apes ' , ' Galactic Punks ' , ' Pesky Penguins ' , ' Solana Monkey Business ' , ' Thugbirdz ' ] ) ]
collection = ' Solana Monkey Business '
for collection in metadata . collection . unique ( ) :
cur = metadata [ metadata . collection == collection ] . fillna ( ' None ' )
cur [ ' token_id ' ] = cur . token_id . astype ( int )
pct = cur [ [ ' token_id ' ] ] . drop_duplicates ( )
pct [ ' pct ' ] = 1
num_tokens = len ( cur . token_id . unique ( ) )
print ( ' Working on {} with {} tokens ' . format ( collection , num_tokens ) )
min ( cur . token_id )
max ( cur . token_id )
ps = pd . DataFrame ( )
for c in cur . feature_name . unique ( ) :
# if c in [ 'Attribute Count' ]:
# continue
g = cur [ cur . feature_name == c ] . groupby ( ' feature_value ' ) . token_id . count ( ) . reset_index ( )
g [ ' cur_pct ' ] = ( g . token_id / num_tokens )
g = cur [ cur . feature_name == c ] . merge ( g [ [ ' feature_value ' , ' cur_pct ' ] ] )
ps = ps . append ( g [ [ ' token_id ' , ' cur_pct ' ] ] )
pct = pct . merge ( g [ [ ' token_id ' , ' cur_pct ' ] ] )
pct [ ' pct ' ] = pct . pct * pct . cur_pct * pct . cur_pct
del pct [ ' cur_pct ' ]
ps [ ' rk ' ] = ps . groupby ( ' token_id ' ) . cur_pct . rank ( ascending = 0 )
ps [ ps . token_id == 1355 ]
mn = ps . rk . min ( )
mx = ps . rk . max ( )
ps [ ' mult ' ] = ps . apply ( lambda x : x [ ' cur_pct ' ] * * ( 1 + ( x [ ' rk ' ] / ( mx - mn ) ) ) )
2022-01-28 23:05:50 +00:00
2021-12-21 05:14:11 +00:00
def run_queries ( ) :
2022-01-06 07:07:29 +00:00
for c in [ ' Levana Dragon Eggs ' , ' Levana Meteors ' , ' Levana Dust ' ] [ 1 : ] :
print ( c )
2021-12-21 05:14:11 +00:00
with open ( ' ./metadata/sql/ {} .txt ' . format ( c ) ) as f :
query = f . readlines ( )
metadata = ctx . cursor ( ) . execute ( ' ' . join ( query ) )
metadata = pd . DataFrame . from_records ( iter ( metadata ) , columns = [ x [ 0 ] for x in metadata . description ] )
metadata = clean_colnames ( metadata )
2021-12-23 20:00:31 +00:00
metadata [ ' image ' ] = metadata . image . apply ( lambda x : ' https://cloudflare-ipfs.com/ipfs/ ' + re . split ( ' / ' , x ) [ - 1 ] )
2021-12-21 05:14:11 +00:00
metadata [ ' collection ' ] = c
metadata [ ' chain ' ] = ' Terra '
2021-12-23 20:00:31 +00:00
list ( metadata . image . values [ : 2 ] ) + list ( metadata . image . values [ - 2 : ] )
2021-12-21 05:14:11 +00:00
metadata . to_csv ( ' ./data/metadata/ {} .csv ' . format ( c ) , index = False )
2021-12-17 16:26:55 +00:00
def add_terra_tokens ( ) :
# galactic punks
query = '''
SELECT msg_value : execute_msg : mint_nft : token_id AS token_id
, msg_value : execute_msg : mint_nft : extension : name AS name
, msg_value : execute_msg : mint_nft : extension : image AS image
FROM terra . msgs
2021-12-23 20:00:31 +00:00
WHERE msg_value : contract : : string = ' terra16wuzgsx3tz4hkqu73q5s7unxenefkkvefvewsh '
2021-12-17 16:26:55 +00:00
AND tx_status = ' SUCCEEDED '
AND msg_value : execute_msg : mint_nft is not null
'''
tokens = ctx . cursor ( ) . execute ( query )
tokens = pd . DataFrame . from_records ( iter ( tokens ) , columns = [ x [ 0 ] for x in tokens . description ] )
tokens = clean_colnames ( tokens )
2022-01-06 07:07:29 +00:00
len ( tokens )
2021-12-17 16:26:55 +00:00
for c in tokens . columns :
tokens [ c ] = tokens [ c ] . apply ( lambda x : re . sub ( ' " ' , ' ' , x ) )
2022-02-04 04:35:13 +00:00
collection = ' Levana Dragon Eggs '
2022-02-24 20:49:51 +00:00
collection = ' Galactic Angels '
2021-12-21 05:14:11 +00:00
for collection in [ ' Galactic Punks ' , ' LunaBulls ' , ' Levana Dragon Eggs ' ] :
if collection == ' Galactic Punks ' :
2021-12-20 15:55:04 +00:00
df = tokens
df [ ' image_url ' ] = df . image . apply ( lambda x : ' https://ipfs.io/ipfs/ ' + re . split ( ' / ' , x ) [ - 1 ] )
2021-12-21 05:14:11 +00:00
else :
df = pd . read_csv ( ' ./data/metadata/ {} .csv ' . format ( collection ) )
df = clean_colnames ( df ) . rename ( columns = { ' tokenid ' : ' token_id ' } )
df [ ' collection ' ] = collection
if collection == ' LunaBulls ' :
df [ ' image_url ' ] = df . ipfs_image
2022-02-24 20:49:51 +00:00
elif ' image ' in df . columns :
2021-12-21 05:14:11 +00:00
df [ ' image_url ' ] = df . image
df [ ' clean_token_id ' ] = df . name . apply ( lambda x : re . split ( ' # ' , x ) [ 1 ] ) . astype ( int )
2021-12-20 15:55:04 +00:00
df [ ' collection ' ] = collection
df [ ' chain ' ] = ' Terra '
old = pd . read_csv ( ' ./data/tokens.csv ' )
old = old [ - ( old . collection == collection ) ]
old = old . drop_duplicates ( subset = [ ' collection ' , ' token_id ' ] , keep = ' first ' )
2021-12-21 05:14:11 +00:00
df [ ' market_url ' ] = df . apply ( lambda x : ' ' if x [ ' chain ' ] == ' Solana ' else ' https://randomearth.io/items/ {} _ {} ' . format ( d_market [ x [ ' collection ' ] ] , x [ ' token_id ' ] ) , 1 )
2021-12-20 15:55:04 +00:00
df = df [ list ( old . columns ) ]
old = old . append ( df )
print ( old . groupby ( ' collection ' ) . clean_token_id . count ( ) )
old . to_csv ( ' ./data/tokens.csv ' , index = False )
2021-12-17 16:26:55 +00:00
def add_terra_metadata ( ) :
query = '''
SELECT CASE
WHEN contract_address = ' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v ' THEN ' Levana Dragons '
2021-12-20 15:55:04 +00:00
WHEN contract_address = ' terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2 ' THEN ' LunaBulls '
WHEN contract_address = ' terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k ' THEN ' Galactic Punks '
ELSE ' Other '
2021-12-17 16:26:55 +00:00
END AS collection
, token_id
2021-12-20 15:55:04 +00:00
, token_metadata : traits AS traits
2021-12-17 16:26:55 +00:00
FROM terra . nft_metadata
2021-12-20 15:55:04 +00:00
WHERE contract_address in (
' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v '
, ' terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2 '
, ' terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k '
)
AND token_metadata : traits IS NOT NULL
2021-12-17 16:26:55 +00:00
'''
2021-12-20 15:55:04 +00:00
db_metadata = ctx . cursor ( ) . execute ( query )
db_metadata = pd . DataFrame . from_records ( iter ( db_metadata ) , columns = [ x [ 0 ] for x in db_metadata . description ] )
db_metadata = clean_colnames ( db_metadata )
2021-12-21 05:14:11 +00:00
collection = ' Levana Dragon Eggs '
2022-02-24 20:49:51 +00:00
collection = ' Galactic Angels '
2021-12-21 05:14:11 +00:00
for collection in [ ' Galactic Punks ' , ' LunaBulls ' , ' Levana Dragon Eggs ' ] :
2021-12-20 15:55:04 +00:00
if collection == ' Galactic Punks ' :
cur = db_metadata [ db_metadata . collection == collection ]
data = [ ]
for row in cur . iterrows ( ) :
row = row [ 1 ]
trait_names = [ re . split ( ' " ' , re . split ( ' : ' , x ) [ 0 ] ) [ 1 ] for x in re . split ( ' , ' , row [ ' traits ' ] ) ]
trait_values = [ re . split ( ' " ' , re . split ( ' : ' , x ) [ 1 ] ) [ 1 ] for x in re . split ( ' , ' , row [ ' traits ' ] ) ]
d = { ' collection ' : row [ ' collection ' ] , ' token_id ' : row [ ' token_id ' ] }
for n , v in zip ( trait_names , trait_values ) :
d [ n ] = v
data + = [ d ]
metadata = pd . DataFrame ( data )
else :
2021-12-21 05:14:11 +00:00
metadata = pd . read_csv ( ' ./data/metadata/ {} .csv ' . format ( collection ) )
2022-01-06 07:07:29 +00:00
metadata . columns = [ x . lower ( ) for x in metadata . columns ]
2021-12-21 05:14:11 +00:00
if ' Levana ' in collection :
metadata = metadata . rename ( columns = { ' rank ' : ' collection_rank ' } )
2021-12-20 15:55:04 +00:00
metadata = clean_colnames ( metadata ) . rename ( columns = { ' tokenid ' : ' token_id ' } )
2022-02-24 20:49:51 +00:00
cols = [ c for c in metadata . columns if not c in [ ' block_timestamp ' , ' block_id ' , ' tx_id ' , ' collection ' , ' chain ' , ' name ' , ' image ' , ' token_name ' ] ]
2021-12-20 15:55:04 +00:00
metadata = metadata [ cols ]
metadata [ ' collection ' ] = collection
2021-12-17 16:26:55 +00:00
2021-12-20 15:55:04 +00:00
none_col = ' None '
metadata = metadata . fillna ( none_col )
for c in [ x for x in metadata . columns if type ( metadata [ x ] . values [ 0 ] ) == str ] :
metadata [ c ] = metadata [ c ] . apply ( lambda x : re . sub ( ' " ' , ' ' , x ) )
if collection == ' Galactic Punks ' :
glitches = [ ' messy pink ' , ' messy blue ' , ' ponytail red ' , ' messy brown ' , ' neat brown ' , ' ponytail black ' , ' neat red ' , ' messy blonde ' , ' neat black ' , ' neat blonde ' , ' ponytail blonde ' ]
metadata [ ' glitch_trait ' ] = metadata . hair . apply ( lambda x : ' Yes ' if x in glitches else ' No ' )
metadata [ ' pct ' ] = 1
metadata [ ' attribute_count ' ] = 0
l = len ( metadata )
2021-12-21 05:14:11 +00:00
incl_att_count = not collection in [ ' Levana Dragon Eggs ' ]
2021-12-20 15:55:04 +00:00
for c in list ( metadata . columns ) + [ ' attribute_count ' ] :
2021-12-21 05:14:11 +00:00
if c in [ ' token_id ' , ' collection ' , ' pct ' , ' levana_rank ' , ' meteor_id ' ] :
continue
if c == ' attribute_count ' and not incl_att_count :
2021-12-20 15:55:04 +00:00
continue
2022-02-24 20:49:51 +00:00
metadata [ c ] = metadata [ c ] . apply ( lambda x : re . sub ( ' _ ' , ' ' , x ) . title ( ) if x == x and type ( x ) == str else x )
2021-12-20 15:55:04 +00:00
g = metadata . groupby ( c ) . token_id . count ( ) . reset_index ( )
g [ ' cur_pct ' ] = g . token_id / l
metadata = metadata . merge ( g [ [ c , ' cur_pct ' ] ] )
metadata [ ' pct ' ] = metadata . pct * metadata . cur_pct
2021-12-21 05:14:11 +00:00
if incl_att_count and not c in [ ' attribute_count ' , ' glitch_trait ' ] :
2021-12-20 15:55:04 +00:00
metadata [ ' attribute_count ' ] = metadata . attribute_count + metadata [ c ] . apply ( lambda x : int ( x != none_col ) )
del metadata [ ' cur_pct ' ]
# cur = metadata[[ 'collection','token_id', c ]].rename(columns={c: 'feature_value'})
# cur['feature_name'] = c
# m = m.append(cur)
2021-12-21 05:14:11 +00:00
if incl_att_count :
metadata . groupby ( ' attribute_count ' ) . token_id . count ( ) . reset_index ( )
# metadata.groupby(['rarity','attribute_count']).token_id.count().reset_index()
2021-12-20 15:55:04 +00:00
# metadata.groupby('backgrounds').token_id.count().reset_index().token_id.sum()
2021-12-21 05:14:11 +00:00
# metadata.sort_values('pct_rank')
metadata . sort_values ( ' pct ' )
2022-02-24 20:49:51 +00:00
metadata [ ' nft_rank ' ] = metadata . pct . rank ( )
2021-12-26 21:28:47 +00:00
# metadata['rarity_score'] = metadata.pct.apply(lambda x: 1.0 / (x**0.07) )
# mn = metadata.rarity_score.min()
# mx = metadata.rarity_score.max()
# metadata = metadata.sort_values('token_id')
# metadata['rarity_score'] = metadata.rarity_score.apply(lambda x: ((x - mn) * 99 / (mx - mn)) + 1)
# metadata['rarity_score_rank'] = metadata.rarity_score.rank(ascending=0, method='first').astype(int)
# metadata.sort_values('rarity_score', ascending=0).head(20)[['token_id','collection_rank','rarity_score','rarity_score_rank']]
# metadata.sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score']]
# len(metadata[metadata.rarity_score<=2.4]) / len(metadata)
# metadata[metadata.token_id==6157].sort_values('rarity_score', ascending=0).tail(20)[['token_id','collection_rank','rarity_score','rank']]
# metadata[metadata['rank']>=3000].groupby('weight').token_id.count()
2021-12-23 20:00:31 +00:00
2021-12-26 21:28:47 +00:00
# metadata.rarity_score.max()
# metadata.rarity_score.min()
# metadata.sort_values('rank')[['rank','pct','rarity_score']]
2021-12-17 22:43:49 +00:00
2021-12-20 15:55:04 +00:00
m = pd . DataFrame ( )
for c in metadata . columns :
if c in [ ' token_id ' , ' collection ' ] :
continue
cur = metadata [ [ ' token_id ' , ' collection ' , c ] ] . rename ( columns = { c : ' feature_value ' } )
cur [ ' feature_name ' ] = c
m = m . append ( cur )
m [ ' chain ' ] = ' Terra '
m . groupby ( ' feature_name ' ) . feature_value . count ( )
2021-12-23 20:00:31 +00:00
if collection == ' Levana Dragon Eggs ' :
add = m [ m . feature_name == ' collection_rank ' ]
add [ ' feature_name ' ] = ' transformed_collection_rank '
2021-12-24 00:26:01 +00:00
mx = add . feature_value . max ( )
mn = add . feature_value . min ( )
add [ ' feature_value ' ] = add . feature_value . apply ( lambda x : 1.42 * * ( 1.42 * * ( 8 * ( x - mn ) / ( mx - mn ) ) ) + 0.13 )
# add['tmp'] = add.feature_value.rank() * 10 / len(add)
# add['tmp'] = add.tmp.astype(int)
# add.groupby('tmp').feature_value.mean()
2021-12-23 20:00:31 +00:00
m = m . append ( add )
2021-12-24 00:26:01 +00:00
add = m [ m . feature_name == ' collection_rank ' ]
add [ ' feature_name ' ] = ' collection_rank_group '
add [ ' feature_value ' ] = add . feature_value . apply ( lambda x : int ( x / 1000 ) )
m = m . append ( add )
2021-12-20 15:55:04 +00:00
g = m . groupby ( ' feature_value ' ) . feature_name . count ( ) . reset_index ( ) . sort_values ( ' feature_name ' ) . tail ( 50 )
old = pd . read_csv ( ' ./data/metadata.csv ' )
2022-02-24 20:49:51 +00:00
m [ ' feature_name ' ] = m . feature_name . apply ( lambda x : re . sub ( ' _ ' , ' ' , x ) . title ( ) )
m [ ' feature_value ' ] = m . feature_value . apply ( lambda x : re . sub ( ' _ ' , ' ' , x ) . title ( ) if type ( x ) == str else x )
l0 = len ( old )
2021-12-20 15:55:04 +00:00
if not ' chain ' in old . columns :
2021-12-21 05:14:11 +00:00
old [ ' chain ' ] = old . collection . apply ( lambda x : ' Terra ' if x in [ ' Galactic Punks ' , ' LunaBulls ' ] else ' Solana ' )
2021-12-20 15:55:04 +00:00
old = old [ - old . collection . isin ( m . collection . unique ( ) ) ]
old = old . append ( m )
2021-12-23 20:00:31 +00:00
old = old . drop_duplicates ( subset = [ ' collection ' , ' token_id ' , ' feature_name ' ] )
old = old [ - ( old . feature_name . isin ( [ ' last_sale ' ] ) ) ]
# print(old.groupby(['chain','collection']).token_id.count())
2021-12-20 15:55:04 +00:00
print ( old [ [ ' chain ' , ' collection ' , ' token_id ' ] ] . drop_duplicates ( ) . groupby ( [ ' chain ' , ' collection ' ] ) . token_id . count ( ) )
2022-02-24 20:49:51 +00:00
l1 = len ( old )
print ( ' Adding {} rows ' . format ( l1 - l0 ) )
2021-12-20 15:55:04 +00:00
old . to_csv ( ' ./data/metadata.csv ' , index = False )
2021-12-17 16:26:55 +00:00
def add_terra_sales ( ) :
2022-05-03 00:19:42 +00:00
print ( ' Adding Terra sales ' )
2021-12-17 16:26:55 +00:00
query = '''
2022-02-04 04:35:13 +00:00
WITH
RE_events AS (
SELECT
block_timestamp ,
tx_id ,
event_attributes
FROM
terra . msg_events
WHERE event_attributes : action = ' execute_orders '
2022-05-04 00:24:11 +00:00
AND event_type = ' from_contract '
AND tx_status = ' SUCCEEDED '
AND block_timestamp > = CURRENT_DATE - 3
2022-02-04 04:35:13 +00:00
) ,
RE_takers AS (
SELECT DISTINCT
tx_id ,
msg_value : sender as taker
FROM
terra . msgs
WHERE
tx_id IN ( SELECT DISTINCT tx_id FROM RE_events )
2022-05-04 00:24:11 +00:00
AND block_timestamp > = CURRENT_DATE - 3
2022-02-04 04:35:13 +00:00
) ,
allSales AS
(
SELECT
block_timestamp ,
tx_id ,
platform ,
nft_from ,
nft_to ,
nft_address ,
CASE nft_address
WHEN ' terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2 ' THEN ' LunaBulls '
WHEN ' terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k ' THEN ' Galactic Punks '
WHEN ' terra1vhuyuwwr4rkdpez5f5lmuqavut28h5dt29rpn6 ' THEN ' Levana Dragons '
2022-02-21 16:10:04 +00:00
WHEN ' terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7 ' THEN ' Levana Dust '
2022-02-04 04:35:13 +00:00
WHEN ' terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg ' THEN ' Levana Eggs '
WHEN ' terra14gfnxnwl0yz6njzet4n33erq5n70wt79nm24el ' THEN ' Levana Loot '
WHEN ' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v ' THEN ' Levana Meteors '
2022-02-24 20:49:51 +00:00
WHEN ' terra13nccm82km0ttah37hkygnvz67hnvkdass24yzv ' THEN ' Galactic Angels '
2022-02-04 04:35:13 +00:00
ELSE nft_address END
as nft ,
amount ,
denom ,
tokenid
FROM (
SELECT
block_timestamp ,
tx_id ,
' Random Earth ' as platform ,
action ,
IFF ( action = ' SELL ' , maker , taker ) as nft_from ,
IFF ( action = ' ACCEPT BID ' , maker , taker ) as nft_to ,
nft_address ,
amount ,
denom ,
tokenid
FROM (
SELECT
block_timestamp ,
e . tx_id ,
action ,
maker ,
taker ,
nft_address ,
amount ,
denom ,
tokenid
FROM (
SELECT
block_timestamp ,
tx_id ,
IFF ( event_attributes : order : order : maker_asset : info : nft is not null , ' SELL ' , ' ACCEPT BID ' ) as action ,
LISTAGG ( CHR ( F . VALUE ) ) WITHIN GROUP ( ORDER BY F . INDEX ) as maker ,
IFF ( event_attributes : order : order : maker_asset : info : nft is not null , event_attributes : order : order : maker_asset : info : nft : contract_addr , event_attributes : order : order : taker_asset : info : nft : contract_addr ) : : string as nft_address ,
IFF ( event_attributes : order : order : maker_asset : info : nft is not null , event_attributes : order : order : taker_asset : amount , event_attributes : order : order : maker_asset : amount ) / 1e6 as amount ,
IFF ( event_attributes : order : order : maker_asset : info : nft is not null , event_attributes : order : order : taker_asset : info : native_token : denom , event_attributes : order : order : maker_asset : info : native_token : denom ) : : string as denom ,
IFF ( event_attributes : order : order : maker_asset : info : nft is not null , event_attributes : order : order : maker_asset : info : nft : token_id , event_attributes : order : order : taker_asset : info : nft : token_id ) as tokenid
FROM
RE_events e ,
LATERAL FLATTEN ( input = > event_attributes : order : order : maker ) F
GROUP BY
block_timestamp ,
tx_id ,
nft_address ,
amount ,
denom ,
tokenid ,
action
) e
JOIN RE_takers t
ON e . tx_id = t . tx_id
)
UNION
SELECT
block_timestamp ,
tx_id ,
' Knowhere ' as platform ,
MAX ( IFF ( event_attributes : bid_amount is not null , ' SELL ' , ' AUCTION ' ) ) as action ,
MAX ( IFF ( event_type = ' coin_received ' , COALESCE ( event_attributes : " 2_receiver " , event_attributes : " 1_receiver " ) , ' ' ) ) as nft_from ,
MAX ( IFF ( event_attributes : " 0_action " = ' settle ' AND event_attributes : " 1_action " = ' transfer_nft ' , event_attributes : recipient , ' ' ) ) as nft_to ,
MAX ( IFF ( event_attributes : " 1_action " is not null , event_attributes : " 1_contract_address " , ' ' ) ) : : string as nft_address ,
MAX ( IFF ( event_type = ' coin_received ' , COALESCE ( NVL ( event_attributes : " 0_amount " [ 0 ] : amount , 0 ) + NVL ( event_attributes : " 1_amount " [ 0 ] : amount , 0 ) + NVL ( event_attributes : " 2_amount " [ 0 ] : amount , 0 ) , event_attributes : amount [ 0 ] : amount ) , 0 ) ) / 1e6 as amount ,
MAX ( IFF ( event_type = ' coin_received ' , COALESCE ( event_attributes : " 0_amount " [ 0 ] : denom , event_attributes : amount [ 0 ] : denom ) , ' ' ) ) : : string as denom ,
MAX ( IFF ( event_type = ' wasm ' , event_attributes : token_id , 0 ) ) as tokenid
FROM
terra . msg_events
WHERE
tx_status = ' SUCCEEDED '
2022-05-04 00:24:11 +00:00
AND block_timestamp > = CURRENT_DATE - 3
2022-02-04 04:35:13 +00:00
AND tx_id IN (
SELECT DISTINCT
tx_id
FROM terra . msgs
WHERE
msg_value : execute_msg : settle : auction_id is not null
AND tx_status = ' SUCCEEDED '
AND msg_value : contract = ' terra12v8vrgntasf37xpj282szqpdyad7dgmkgnq60j '
2022-05-04 00:24:11 +00:00
AND block_timestamp > = CURRENT_DATE - 3
2022-02-04 04:35:13 +00:00
)
GROUP BY
block_timestamp ,
tx_id
UNION
SELECT
block_timestamp ,
tx_id ,
' Luart ' as platform ,
UPPER ( event_attributes : order_type ) as action ,
event_attributes : order_creator as nft_from , - - for sells , no info about other types yet
event_attributes : recipient as nft_to ,
event_attributes : nft_contract_address as nft_address ,
event_attributes : price / 1e6 as amount ,
event_attributes : denom : : string as denom ,
event_attributes : " 0_token_id " as tokenid
FROM terra . msg_events
WHERE
event_type = ' from_contract '
AND event_attributes : action = ' transfer_nft '
AND event_attributes : method = ' execute_order '
AND event_attributes : " 0_contract_address " = ' terra1fj44gmt0rtphu623zxge7u3t85qy0jg6p5ucnk '
2022-05-04 00:24:11 +00:00
AND block_timestamp > = CURRENT_DATE - 3
2022-02-04 04:35:13 +00:00
)
WHERE nft_address IN (
2022-02-24 20:49:51 +00:00
' terra13nccm82km0ttah37hkygnvz67hnvkdass24yzv ' ,
2022-02-04 04:35:13 +00:00
' terra1trn7mhgc9e2wfkm5mhr65p3eu7a2lc526uwny2 ' ,
' terra103z9cnqm8psy0nyxqtugg6m7xnwvlkqdzm4s4k ' ,
' terra1vhuyuwwr4rkdpez5f5lmuqavut28h5dt29rpn6 ' ,
' terra1p70x7jkqhf37qa7qm4v23g4u4g8ka4ktxudxa7 ' ,
' terra1k0y373yxqne22pc9g7jvnr4qclpsxtafevtrpg ' ,
' terra14gfnxnwl0yz6njzet4n33erq5n70wt79nm24el ' ,
' terra1chrdxaef0y2feynkpq63mve0sqeg09acjnp55v '
)
)
select * from allsales
'''
2021-12-17 16:26:55 +00:00
sales = ctx . cursor ( ) . execute ( query )
sales = pd . DataFrame . from_records ( iter ( sales ) , columns = [ x [ 0 ] for x in sales . description ] )
sales = clean_colnames ( sales )
2022-02-04 04:35:13 +00:00
# tokens = pd.read_csv('./data/tokens.csv')
# tokens['tmp'] = tokens.token_id.apply(lambda x: (str(x)[:5]))
# tokens[tokens.collection == 'Galactic Punks'].to_csv('~/Downloads/tmp.csv', index=False)
2022-02-21 16:10:04 +00:00
sales . tokenid . values [ : 4 ]
2022-02-04 04:35:13 +00:00
sales [ ' tokenid ' ] = sales . tokenid . apply ( lambda x : str ( int ( float ( x ) ) ) )
# tokens['token_id'] = tokens.token_id.astype(str)
# s = sales[sales.nft == 'Galactic Punks']
# t = tokens[tokens.collection == 'Galactic Punks'].token_id.values
# s[s.tokenid.isin(t)]
sales = sales . rename ( columns = {
' nft ' : ' collection '
, ' block_timestamp ' : ' sale_date '
, ' amount ' : ' price '
, ' tokenid ' : ' token_id '
} )
2022-02-21 16:10:04 +00:00
sales = clean_token_id ( sales )
2022-05-04 00:24:11 +00:00
assert ( len ( sales [ sales . price . isnull ( ) ] ) == 0 )
2021-12-17 16:26:55 +00:00
old = pd . read_csv ( ' ./data/sales.csv ' )
2022-05-04 00:24:11 +00:00
go = old . groupby ( ' collection ' ) . token_id . count ( ) . reset_index ( ) . rename ( columns = { ' token_id ' : ' n_old ' } )
2022-01-06 07:07:29 +00:00
l0 = len ( old )
2022-05-04 00:24:11 +00:00
app = old [ ( old . collection . isin ( sales . collection . unique ( ) ) ) ] . append ( sales )
assert ( len ( app [ app . tx_id . isnull ( ) ] ) == 0 )
app = app . drop_duplicates ( ' tx_id ' )
2021-12-17 16:26:55 +00:00
old = old [ - ( old . collection . isin ( sales . collection . unique ( ) ) ) ]
2022-05-04 00:24:11 +00:00
old = old . append ( app )
old = old [ [ ' collection ' , ' token_id ' , ' sale_date ' , ' price ' , ' tx_id ' ] ]
# check changes
2022-01-28 23:05:50 +00:00
l1 = len ( old )
2022-05-04 00:24:11 +00:00
gn = old . groupby ( ' collection ' ) . token_id . count ( ) . reset_index ( ) . rename ( columns = { ' token_id ' : ' n_new ' } )
g = gn . merge ( go , how = ' outer ' , on = [ ' collection ' ] ) . fillna ( 0 )
g [ ' dff ' ] = g . n_new - g . n_old
g = g [ g . dff != 0 ] . sort_values ( ' dff ' , ascending = 0 )
print ( g )
2022-01-06 07:07:29 +00:00
print ( ' Added {} sales ' . format ( l1 - l0 ) )
2021-12-17 16:26:55 +00:00
old . to_csv ( ' ./data/sales.csv ' , index = False )
2022-05-04 00:24:11 +00:00
return ( old )
2022-03-21 16:34:42 +00:00
def rarity_tools ( browser ) :
data = [ ]
collection = ' boredapeyachtclub '
collection = ' mutant-ape-yacht-club '
collection = ' bored-ape-kennel-club '
url = ' https://rarity.tools/ {} ' . format ( collection )
browser . get ( url )
for i in range ( 201 ) :
print ( i , len ( data ) )
sleep ( 0.1 )
soup = BeautifulSoup ( browser . page_source )
for div in soup . find_all ( ' div ' , class_ = ' bgCard ' ) :
rk = div . find_all ( ' div ' , class_ = ' font-extrabold ' )
img = div . find_all ( ' img ' )
if len ( rk ) and len ( img ) :
# try:
rk = int ( just_float ( rk [ 0 ] . text ) )
img_url = re . split ( ' \ ? ' , img [ 0 ] . attrs [ ' src ' ] ) [ 0 ]
token_id = int ( re . split ( ' /| \ . ' , img_url ) [ 6 ] )
data + = [ [ collection , token_id , img_url , rk ] ]
# except:
# pass
# bs = browser.find_elements_by_class_name('smallBtn')
browser . find_elements_by_class_name ( ' smallBtn ' ) [ 4 + int ( i > 0 ) ] . click ( )
sleep ( 0.1 )
# for i in range(len(bs)):
# print(i, browser.find_elements_by_class_name('smallBtn')[i].text)
df = pd . DataFrame ( data , columns = [ ' collection ' , ' token_id ' , ' image_url ' , ' nft_rank ' ] ) . drop_duplicates ( )
len ( df )
df [ ' chain ' ] = ' Ethereum '
df [ ' clean_token_id ' ] = df . token_id
df [ ' collection ' ] = df . collection . apply ( lambda x : clean_name ( x ) )
len ( df )
old = pd . read_csv ( ' ./data/tokens.csv ' )
l0 = len ( old )
old = old [ - old . collection . isin ( df . collection . unique ( ) ) ]
old = old . append ( df )
l1 = len ( old )
print ( ' Added {} rows ' . format ( format_num ( l1 - l0 ) ) )
old . tail ( )
old [ old . chain == ' Ethereum ' ] . collection . unique ( )
old . to_csv ( ' ./data/tokens.csv ' , index = False )
def eth_metadata_api ( ) :
old = pd . read_csv ( ' ./data/metadata.csv ' )
2022-03-22 20:27:08 +00:00
collection = ' BAYC '
2022-03-21 16:34:42 +00:00
collection = ' MAYC '
2022-03-22 20:27:08 +00:00
seen = [ ]
2022-03-21 16:34:42 +00:00
seen = sorted ( old [ old . collection == collection ] . token_id . unique ( ) )
a_data = [ ]
t_data = [ ]
errs = [ ]
2022-03-22 20:27:08 +00:00
# for i in range(10000):
it = 0
for i in ids [ 21 : ] :
sleep ( .1 )
it + = 1
if it % 1 == 0 :
2022-03-21 16:34:42 +00:00
print ( i , len ( t_data ) , len ( a_data ) , len ( errs ) )
if i in seen :
continue
2022-03-22 20:27:08 +00:00
# try:
url = ' https://boredapeyachtclub.com/api/mutants/ {} ' . format ( i )
2022-03-21 16:34:42 +00:00
try :
j = requests . get ( url ) . json ( )
t_data + = [ [ i , j [ ' image ' ] ] ]
for a in j [ ' attributes ' ] :
a_data + = [ [ i , a [ ' trait_type ' ] , a [ ' value ' ] ] ]
except :
2022-03-22 20:27:08 +00:00
print ( ' Re-trying once... ' )
sleep ( 30 )
try :
j = requests . get ( url ) . json ( )
t_data + = [ [ i , j [ ' image ' ] ] ]
for a in j [ ' attributes ' ] :
a_data + = [ [ i , a [ ' trait_type ' ] , a [ ' value ' ] ] ]
except :
print ( ' Re-trying twice... ' )
sleep ( 30 )
j = requests . get ( url ) . json ( )
t_data + = [ [ i , j [ ' image ' ] ] ]
for a in j [ ' attributes ' ] :
a_data + = [ [ i , a [ ' trait_type ' ] , a [ ' value ' ] ] ]
# errs.append(i)
2022-03-21 16:34:42 +00:00
new_mdf = pd . DataFrame ( a_data , columns = [ ' token_id ' , ' feature_name ' , ' feature_value ' ] )
new_mdf [ ' collection ' ] = ' MAYC '
new_mdf [ ' chain ' ] = ' Ethereum '
old = old . append ( new_mdf )
old . to_csv ( ' ./data/metadata.csv ' , index = False )
new_tdf = pd . DataFrame ( t_data , columns = [ ' token_id ' , ' image_url ' ] )
2022-03-22 20:27:08 +00:00
new_tdf [ ' collection ' ] = ' MAYC '
m = pd . read_csv ( ' ./data/metadata.csv ' )
2022-03-21 16:34:42 +00:00
old = pd . read_csv ( ' ./data/tokens.csv ' )
2022-03-22 20:27:08 +00:00
l0 = len ( old )
old = old . merge ( new_tdf , on = [ ' collection ' , ' token_id ' ] , how = ' left ' )
old [ old . image_url_y . notnull ( ) ]
old [ old . image_url_y . notnull ( ) ] [ [ ' image_url_x ' , ' image_url_y ' ] ]
old [ ' image_url ' ] = old . image_url_y . fillna ( old . image_url_x )
del old [ ' image_url_x ' ]
del old [ ' image_url_y ' ]
l1 = len ( old )
print ( ' Adding {} rows ' . format ( l1 - l0 ) )
old . to_csv ( ' ./data/tokens.csv ' , index = False )
tmp = old [ old . collection == ' MAYC ' ]
tmp [ ' tmp ' ] = tmp . image_url . apply ( lambda x : int ( ' nft-media ' in x ) )
tmp [ tmp . tmp == 1 ] . merge ( m [ [ ' token_id ' ] ] . drop_duplicates ( ) ) [ [ ' token_id ' ] ] . drop_duplicates ( )
ids = tmp [ tmp . tmp == 1 ] . merge ( m [ [ ' token_id ' ] ] . drop_duplicates ( ) ) . token_id . unique ( )
2022-03-21 16:34:42 +00:00
a = old [ old . collection == ' MAYC ' ] . token_id . unique ( )
b = new_tdf . token_id . unique ( )
[ x for x in b if not x in a ]
new_mdf [ ' collection ' ] = ' MAYC '
new_mdf [ ' chain ' ] = ' Ethereum '
old = old . append ( new_mdf )
old . to_csv ( ' ./data/metadata.csv ' , index = False )
2022-03-22 20:27:08 +00:00
collection = ' BAYC '
data = [ ]
for i in range ( 0 , 1000 ) :
if i % 100 == 1 :
print ( i , len ( data ) )
url = ' https://ipfs.io/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/ {} ' . format ( i )
# try:
j = requests . get ( url , verify = False , timeout = 1 ) . json ( )
data + = [ [ collection , i , j [ ' image ' ] ] ]
# except:
# print(i)
2022-03-21 16:34:42 +00:00
def eth_metadata ( ) :
query = '''
SELECT contract_name
, token_id
, token_metadata : Background AS background
, token_metadata : Clothes AS clother
, token_metadata : Earring AS earring
, token_metadata : Eyes AS eyes
, token_metadata : Fur AS fur
, token_metadata : Hat AS hat
, token_metadata : Mouth AS mouth
2022-03-22 20:27:08 +00:00
, image_url
2022-03-21 16:34:42 +00:00
FROM ethereum . nft_metadata
WHERE contract_name IN ( ' MutantApeYachtClub ' , ' bayc ' )
'''
metadata = ctx . cursor ( ) . execute ( query )
metadata = pd . DataFrame . from_records ( iter ( metadata ) , columns = [ x [ 0 ] for x in metadata . description ] )
metadata = clean_colnames ( metadata )
metadata [ ' collection ' ] = metadata . contract_name . apply ( lambda x : x [ 0 ] . upper ( ) + ' AYC ' )
2022-03-22 20:27:08 +00:00
metadata [ ' image_url ' ] = metadata . image_url . apply ( lambda x : ' https://ipfs.io/ipfs/ {} ' . format ( re . split ( ' / ' , x ) [ - 1 ] ) if ' ipfs ' in x else x )
# metadata['image_url'] = metadata.tmp
old = pd . read_csv ( ' ./data/tokens.csv ' )
old = old . merge ( metadata [ [ ' collection ' , ' token_id ' , ' image_url ' ] ] , how = ' left ' , on = [ ' collection ' , ' token_id ' ] )
old [ old . image_url_y . notnull ( ) ]
old [ ' image_url ' ] = old . image_url_y . fillna ( old . image_url_x )
del old [ ' image_url_x ' ]
del old [ ' image_url_y ' ]
del metadata [ ' image_url ' ]
old . to_csv ( ' ./data/tokens.csv ' , index = False )
2022-03-21 16:34:42 +00:00
ndf = pd . DataFrame ( )
e = [ ' contract_name ' , ' token_id ' , ' collection ' ]
for c in [ c for c in metadata . columns if not c in e ] :
cur = metadata [ [ ' collection ' , ' token_id ' , c ] ]
cur . columns = [ ' collection ' , ' token_id ' , ' feature_value ' ]
cur [ ' feature_name ' ] = c . title ( )
cur . feature_value . unique ( )
cur [ ' feature_value ' ] = cur . feature_value . apply ( lambda x : x [ 1 : - 1 ] if x else ' None ' )
ndf = ndf . append ( cur )
ndf = ndf . drop_duplicates ( )
ndf [ ' chain ' ] = ' Ethereum '
g = ndf . groupby ( [ ' collection ' , ' feature_name ' , ' feature_value ' ] ) . token_id . count ( ) . reset_index ( )
old = pd . read_csv ( ' ./data/metadata.csv ' )
old . head ( )
l0 = len ( old )
old = old . append ( ndf )
l1 = len ( old )
print ( ' Adding {} rows ' . format ( l1 - l0 ) )
old . to_csv ( ' ./data/metadata.csv ' , index = False )
t_data = [ ]
a_data = [ ]
for i in range ( 10000 ) :
if i % 100 == 1 :
print ( i , len ( t_data ) , len ( a_data ) )
token_id = i + 1
url = ' https://us-central1-bayc-metadata.cloudfunctions.net/api/tokens/ {} ' . format ( i )
j = requests . get ( url ) . json ( )
t_data + = [ [ token_id , j [ ' image ' ] ] ]
for a in j [ ' attributes ' ] :
a_data + = [ [ token_id , a [ ' trait_type ' ] , a [ ' value ' ] ] ]
df = pd . DataFrame ( t_data , columns = [ ' token_id ' , ' ' ] )
2021-12-17 16:26:55 +00:00
2021-10-29 16:46:13 +00:00
######################################
# Grab Data From OpenSea API #
######################################
def load_api_data ( ) :
2022-05-01 05:50:55 +00:00
headers = {
' Content-Type ' : ' application/json '
, ' X-API-KEY ' : ' 2b7cbb0ebecb468bba431aefb8dbbebe '
}
2021-12-17 16:26:55 +00:00
data = [ ]
traits_data = [ ]
2022-05-01 05:50:55 +00:00
contract_address = ' 0x23581767a106ae21c074b2276d25e5c3e136a68b '
2022-03-21 16:34:42 +00:00
# url = 'https://api.opensea.io/api/v1/assets?asset_contract_address=0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d&limit=20&token_ids=8179'
2022-05-01 05:50:55 +00:00
# for o in [ 'asc', 'desc' ]:
for o in [ ' asc ' ] :
2021-12-17 16:26:55 +00:00
l = 1
it = 0
offset = 0
while l and offset < = 10000 :
if offset % 1000 == 0 :
print ( " # {} / {} " . format ( offset , 20000 ) )
2022-05-01 05:50:55 +00:00
r = requests . get ( ' https://api.opensea.io/api/v1/assets?asset_contract_address= {} &order_by=pk&order_direction= {} &offset= {} &limit=50 ' . format ( contract_address , o , offset ) , headers = headers )
2022-03-21 16:34:42 +00:00
# r = requests.get(url)
2021-12-17 16:26:55 +00:00
assets = r . json ( ) [ ' assets ' ]
l = len ( assets )
for a in assets :
token_id = a [ ' token_id ' ]
for t in a [ ' traits ' ] :
traits_data + = [ [ contract_address , token_id , t [ ' trait_type ' ] , t [ ' value ' ] ] ]
data + = [ [ contract_address , token_id , a [ ' image_url ' ] ] ]
offset + = 50
opensea_data = pd . DataFrame ( data , columns = [ ' contract_address ' , ' token_id ' , ' image_url ' ] ) . drop_duplicates ( )
len ( opensea_data . token_id . unique ( ) )
traits = pd . DataFrame ( traits_data , columns = [ ' contract_address ' , ' token_id ' , ' trait_type ' , ' trait_value ' ] ) . drop_duplicates ( )
# a = set(range(opensea_data.token_id.min(), opensea_data.token_id.max()))
# b = set(opensea_data.token_id.unique())
# a.difference(b)
# len(opensea_data)
# sorted(traits.trait_type.unique())
traits = traits [ ( traits . trait_type != ' Token ID ' ) ]
traits [ ' token_id ' ] = traits . token_id . astype ( int )
2022-05-01 05:50:55 +00:00
traits . to_csv ( ' ./data/moonbird_traits.csv ' , index = False )
opensea_data . to_csv ( ' ./data/moonbird_data.csv ' , index = False )
2022-03-21 16:34:42 +00:00
traits = pd . read_csv ( ' ./data/mayc_traits.csv ' )
opensea_data = pd . read_csv ( ' ./data/mayc_data.csv ' )
2021-12-17 16:26:55 +00:00
len ( traits . token_id . unique ( ) )
opensea_data [ ' token_id ' ] = opensea_data . token_id . astype ( int )
opensea_data . token_id . max ( )
len ( opensea_data )
2021-10-27 18:05:43 +00:00
2021-12-17 16:26:55 +00:00
it = 0
max_it = 9458
for row in opensea_data . iterrows ( ) :
it + = 1
if it % 100 == 0 :
print ( ' # {} / {} ' . format ( it , len ( opensea_data ) ) )
if it < max_it :
continue
row = row [ 1 ]
urllib . request . urlretrieve ( row [ ' image_url ' ] , ' ./viz/www/img/ {} .png ' . format ( row [ ' token_id ' ] ) )
def load_api_data ( ) :
results = [ ]
contract_address = ' 0x60e4d786628fea6478f785a6d7e704777c86a7c6 '
for o in [ ' asc ' , ' desc ' ] :
l = 1
it = 0
offset = 0
while l and offset < = 10000 :
if offset % 1000 == 0 :
print ( " # {} / {} " . format ( offset , 20000 ) )
r = requests . get ( ' https://api.opensea.io/api/v1/assets?asset_contract_address= {} &order_by=pk&order_direction= {} &offset= {} &limit=50 ' . format ( contract_address , o , offset ) )
assets = r . json ( ) [ ' assets ' ]
for a in assets :
token_metadata = { }
for t in a [ ' traits ' ] :
token_metadata [ t [ ' trait_type ' ] ] = t [ ' value ' ]
token_id = a [ ' token_id ' ]
d = {
' commission_rate ' : None
, ' contract_address ' : a [ ' asset_contract ' ] [ ' address ' ]
, ' contract_name ' : a [ ' asset_contract ' ] [ ' name ' ]
, ' created_at_block_id ' : 0
, ' created_at_timestamp ' : re . sub ( ' T ' , ' ' , str ( a [ ' asset_contract ' ] [ ' created_date ' ] ) )
, ' created_at_tx_id ' : ' '
, ' creator_address ' : a [ ' creator ' ] [ ' address ' ] if a [ ' creator ' ] else a [ ' asset_contract ' ] [ ' address ' ]
, ' creator_name ' : a [ ' creator ' ] [ ' address ' ] if a [ ' creator ' ] else a [ ' asset_contract ' ] [ ' name ' ]
, ' image_url ' : a [ ' image_url ' ]
, ' project_name ' : a [ ' asset_contract ' ] [ ' name ' ]
, ' token_id ' : token_id
, ' token_metadata ' : token_metadata
, ' token_metadata_uri ' : a [ ' image_original_url ' ]
, ' token_name ' : ' {} # {} ' . format ( a [ ' asset_contract ' ] [ ' symbol ' ] , token_id )
}
results . append ( d )
offset + = 50
2021-10-20 13:10:43 +00:00
2021-12-17 16:26:55 +00:00
n = 50
r = math . ceil ( len ( results ) / n )
blockchain = ' ethereum '
directory = ' mayc '
for i in range ( r ) :
newd = {
" model " : {
" blockchain " : blockchain ,
" sinks " : [
{
" destination " : " {database_name} .silver.nft_metadata " ,
" type " : " snowflake " ,
" unique_key " : " blockchain || contract_address || token_id "
}
] ,
} ,
" results " : results [ ( i * n ) : ( ( i * n ) + r ) ]
}
with open ( ' ./data/metadata/ {} / {} .txt ' . format ( directory , i ) , ' w ' ) as outfile :
outfile . write ( json . dumps ( newd ) )