diff --git a/format_data.py b/format_data.py index ff1a4bfb..d69dd94b 100644 --- a/format_data.py +++ b/format_data.py @@ -3,6 +3,7 @@ import os import math import json import pandas as pd +from scrape_sol_nfts import clean_name import snowflake.connector os.chdir('/Users/kellenblumberg/git/nft-deal-score') @@ -235,6 +236,80 @@ def levana(): with open('./data/metadata/levana_dragon_eggs/{}.txt'.format(i), 'w') as outfile: outfile.write(json.dumps(newd)) +def solana(): + mints = pd.read_csv('./data/solana_rarities.csv') + collection_info = pd.read_csv('./data/collection_info.csv') + metadata = pd.read_csv('./data/metadata.csv') + tokens = pd.read_csv('./data/tokens.csv') + tokens['token_id'] = tokens.token_id.astype(str) + metadata['token_id'] = metadata.token_id.astype(str) + metadata = metadata.merge(tokens) + metadata = metadata.merge(collection_info) + metadata['token_id'] = metadata.clean_token_id.fillna(metadata.token_id) + metadata = metadata[-metadata.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])] + + metadata['token_id'] = metadata.token_id.astype(int) + mints['token_id'] = mints.token_id.astype(int) + mints['collection'] = mints.collection.apply(lambda x: clean_name(x) ) + + # metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False) + + for collection in metadata.collection.unique()[2:]: + print(collection) + mdf = metadata[metadata.collection == collection] + results = [] + for token_id in mdf.token_id.unique(): + pass + cur = mdf[mdf.token_id == token_id] + token_metadata = {} + m = mints[(mints.collection == collection) & (mints.token_id == token_id) ] + if not len(m): + print(token_id) + continue + mint_address = m.mint_address.values[0] + for row in cur.iterrows(): + row = row[1] + token_metadata[row['feature_name']] = row['feature_value'] + + d = { + 'commission_rate': None + , 'mint_address': mint_address + , 'token_id': token_id + , 'contract_address': row['contract_address'] + , 'contract_name': row['collection'] + , 'created_at_block_id': 0 + , 'created_at_timestamp': str(row['created_at_timestamp']) + , 'created_at_tx_id': '' + , 'creator_address': row['contract_address'] + , 'creator_name': row['collection'] + , 'image_url': row['image_url'] + , 'project_name': row['collection'] + , 'token_id': int(token_id) + , 'token_metadata': token_metadata + , 'token_metadata_uri': row['image_url'] + , 'token_name': row['collection'] + } + results.append(d) + print('Uploading {} results'.format(len(results))) + + n = 50 + r = math.ceil(len(results) / n) + for i in range(r): + newd = { + "model": { + "blockchain": "solana", + "sinks": [ + { + "destination": "{database_name}.silver.nft_metadata", + "type": "snowflake", + "unique_key": "blockchain || contract_address || token_id" + } + ], + }, + "results": results[(i * n):((i * n)+r)] + } + with open('./data/metadata/{}/{}.txt'.format(collection, i), 'w') as outfile: + outfile.write(json.dumps(newd)) def bayc(): with open('./data/bayc.json') as f: j = json.load(f) diff --git a/prepare_data.py b/prepare_data.py index 57fe18f6..cc48f5b7 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -50,7 +50,7 @@ for c in m_df.collection.unique(): print(m_df[(m_df.token_id=='1') & (m_df.collection == 'Solana Monkey Business')]) print(m_df[(m_df.token_id=='10') & (m_df.collection == 'Aurory')]) -for c in [ 'nft_rank','adj_nft_rank_0','adj_nft_rank_1' ]: +for c in [ 'nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2' ]: cur = rarities[[ 'collection','token_id',c ]].rename(columns={c: 'feature_value'}) cur['feature_name'] = c m_df = m_df[ m_df.feature_name != c ] diff --git a/scrape_sol_nfts.py b/scrape_sol_nfts.py index 26523426..2b70f5a8 100644 --- a/scrape_sol_nfts.py +++ b/scrape_sol_nfts.py @@ -17,6 +17,19 @@ from selenium.webdriver.common.keys import Keys os.chdir('/Users/kellenblumberg/git/nft-deal-score') os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/' +# Updates +# Final updates to NTR App +# Helped gather mint_address data and metadata for solana hackathon +# Updated NFT Deal score model to enable easy addition to +# Accomplishments +# Version 1.0 of NTR app is now live at https://rstudio-connect.flipside.kitchen/ntr/ thanks to @eric +# Problems Encountered +# Still waiting for Harmony data to be released (was hoping it would be ready early this week) +# Priorities +# Assist with Solana <3 week where needed () +# Build DeFi Kingdoms query +# Concerns + # browser = webdriver.Chrome() # old = pd.read_csv('./data/tokens.csv') @@ -380,9 +393,31 @@ def scrape_recent_sales(): del o_sales['tmp'] o_sales.to_csv('./data/sales.csv', index=False) +def scrape_solanafloor(): + browser.get('https://solanafloor.com/') + soup = BeautifulSoup(browser.page_source) + d0 = soup.find_all('div', class_='ag-pinned-left-cols-container') + d1 = soup.find_all('div', class_='ag-center-cols-clipper') + len(d0) + len(d1) + d0 = d0[1] + d1 = d1[1] + rows0 = d0.find_all('div', class_='ag-row') + rows1 = d1.find_all('div', class_='ag-row') + data = [] + for r in rows1: + cell1 = r.find_all('div', class_='ag-cell') + a = cell1[0].find_all('a')[0] + project = re.split('/', a.attrs['href'])[-1] + data += [[ project, int('Lite' in cell1[0].text) ]] + df = pd.DataFrame(data, columns=['project','is_lite']) + df.to_csv('./data/sf_projects.csv', index=False) + + def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenapes','peskypenguinclub' ], alerted = [], is_listings = True): print('Scraping solanafloor listings...') data = [] + m_data = [] # collections = [ 'aurory','thugbirdz','meerkatmillionaires','aurory','degenapes' ] # collections = [ 'aurory','thugbirdz','smb','degenapes' ] # collections = [ 'smb' ] @@ -391,7 +426,11 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap , 'degenapes': 'degen-ape-academy' , 'peskypenguinclub': 'pesky-penguins' } - collection = 'smb' + collections = ['the-suites'] + sf_projects = pd.read_csv('./data/sf_projects.csv') + old = pd.read_csv('./data/solana_rarities.csv') + collections = sf_projects[(sf_projects.to_scrape==1) & (sf_projects.is_lite==0) & (-sf_projects.collection.isin(old.collection.unique()))].collection.unique() + collection = 'portals' for collection in collections: if collection == 'boryokudragonz': continue @@ -410,6 +449,7 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap page += 1 for j in [20, 30, 30, 30, 30, 30, 30, 30] * 1: for _ in range(1): + pass soup = BeautifulSoup(browser.page_source) # for row in browser.find_elements_by_class_name('ag-row'): # cells = row.find_elements_by_class_name('ag-cell') @@ -419,12 +459,16 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap # data += [[ collection, token_id, price ]] d0 = soup.find_all('div', class_='ag-pinned-left-cols-container') d1 = soup.find_all('div', class_='ag-center-cols-clipper') + h1 = soup.find_all('div', class_='ag-header-row') if not len(d0) or not len(d1): continue d0 = d0[0] d1 = d1[0] + h1 = h1[1] rows0 = d0.find_all('div', class_='ag-row') rows1 = d1.find_all('div', class_='ag-row') + hs1 = h1.find_all('div', class_='ag-header-cell') + hs1 = [ x.text.strip() for x in hs1 ] for k in range(len(rows0)): # for row in soup.find_all('div', class_='ag-row'): # # print(row.text) @@ -432,6 +476,7 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap cell1 = rows1[k].find_all('div', class_='ag-cell') if len(cell1) > 2: token_id = cell0[0].text + mint_address = re.split('/', cell0[0].find_all('a')[0].attrs['href'])[-1] if len(cell0[0].find_all('a')) else None price = cell1[2 if is_listings else 0].text if len(token_id) and len(price): # token_id = int(token_id[0].text) @@ -443,7 +488,12 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap if not price and is_listings: continue if not token_id in seen: - data += [[ collection, token_id, price ]] + if not is_listings: + data += [[ collection, token_id, mint_address, price ]] + for l in range(len(hs1)): + m_data += [[ collection, token_id, mint_address, hs1[l], cell1[l].text.strip() ]] + else: + data += [[ collection, token_id, price ]] seen.append(token_id) # else: # print(row.text) @@ -459,12 +509,25 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap else: has_more = False break - if not is_listings: - old = pd.read_csv('./data/solana_rarities.csv') - rarities = pd.DataFrame(data, columns=['collection','token_id','nft_rank']).drop_duplicates() - rarities = rarities.append(old).drop_duplicates() - print(rarities.groupby('collection').token_id.count()) - rarities.to_csv('./data/solana_rarities.csv', index=False) + if not is_listings: + old = pd.read_csv('./data/solana_rarities.csv') + rarities = pd.DataFrame(data, columns=['collection','token_id','mint_address','nft_rank']).drop_duplicates() + rarities = rarities.append(old).drop_duplicates() + rarities = rarities[-rarities.collection.isin(rem)] + print(rarities.groupby('collection').token_id.count().reset_index().sort_values('token_id')) + rarities.to_csv('./data/solana_rarities.csv', index=False) + + old = pd.read_csv('./data/sf_metadata.csv') + metadata = pd.DataFrame(m_data, columns=['collection','token_id','mint_address','feature_name','feature_value']).drop_duplicates() + metadata = metadata[ -metadata.feature_name.isin(['Rank *','Owner','Listed On','Price','USD','Buy Link']) ] + metadata = metadata.append(old).drop_duplicates() + metadata.feature_name.unique() + g = metadata[[ 'collection','token_id' ]].drop_duplicates().groupby('collection').token_id.count().reset_index().sort_values('token_id') + rem = g[g.token_id<99].collection.unique() + metadata = metadata[-metadata.collection.isin(rem)] + print(g) + # g.to_csv('~/Downloads') + metadata.to_csv('./data/sf_metadata.csv', index=False) old = pd.read_csv('./data/listings.csv') listings = pd.DataFrame(data, columns=['collection','token_id','price']).drop_duplicates() @@ -894,6 +957,108 @@ def scratch(): o_sales.head() o_sales.to_csv('./data/md_sales.csv', index=False) +def create_mint_csv(): + mints = pd.DataFrame() + auth_to_mint = {} + for collection, update_authority in d.items(): + auth_to_mint[update_authority] = collection + for fname in [ './data/mints/'+f for f in os.listdir('./data/mints') ]: + pass + with open(fname, 'r') as f: + j = json.load(f) + cur = pd.DataFrame(j) + cur.columns = ['mint_address'] + cur['update_authority'] = re.split('/|_', fname)[3] + cur['collection'] = re.split('/|_', fname)[3] + +def scrape_how_rare_is(): + d = { + 'degenapes': 40 + ,'aurory': 40 + } + data = [] + for collection, num_pages in d.items(): + for page in range(num_pages): + if len(data): + print(data[-1]) + url = 'https://howrare.is/{}/?page={}&ids=&sort_by=rank'.format(collection, page) + browser.get(url) + sleep(0.1) + soup = BeautifulSoup(browser.page_source) + len(soup.find_all('div', class_='featured_item_img')) + for div in soup.find_all('div', class_='featured_item_img'): + image_url = div.find_all('img')[0].attrs['src'] + token_id = re.split('/', div.find_all('a')[0].attrs['href'])[-2] + data += [[ collection, token_id, image_url ]] + df = pd.DataFrame(data, columns=['collection','token_id','image_url']) + df['collection'] = df.collection.apply(lambda x: clean_name(x) ) + df['clean_token_id'] = df.token_id + df['chain'] = 'Solana' + tokens = pd.read_csv('./data/tokens.csv') + tokens = tokens[-tokens.collection.isin(df.collection.unique())] + tokens = tokens.append(df) + tokens.to_csv('./data/tokens.csv', index=False) + + + +def scrape_mints(): + + nft_mint_addresses = pd.read_csv('./data/nft_mint_addresses.csv') + nft_mint_addresses['collection'] = nft_mint_addresses.collection.apply(lambda x: clean_name(x) ) + nft_mint_addresses.head() + + solana_nfts = pd.read_csv('./data/solana_nfts.csv') + solana_nfts = solana_nfts[solana_nfts.update_authority.notnull()] + solana_nfts = solana_nfts[solana_nfts.collection != 'Boryoku Baby Dragonz'] + print(solana_nfts.groupby('update_authority').collection.count().reset_index().sort_values('collection', ascending=0).head(10)) + + nft_mint_addresses.collection.unique() + nft_mint_addresses = nft_mint_addresses.merge( solana_nfts ) + nft_mint_addresses.collection.unique() + mints = pd.read_csv('./data/solana_mints.csv') + mints = mints[-mints.collection.isin(nft_mint_addresses.collection.unique())] + mints = mints.append(nft_mint_addresses[list(mints.columns)]) + mints.head() + seen = list(mints.update_authority.unique()) + rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/' + d = {} + for row in solana_nfts.iterrows(): + row = row[1] + d[row['collection']] = row['update_authority'] + + remaining = sorted(solana_nfts[-solana_nfts.collection.isin(mints.collection.unique())].collection.unique()) + print('{}'.format(len(remaining))) + collection = 'Boryoku Dragonz' + for collection in remaining: + update_authority = d[collection] + if update_authority in seen or collection in [ 'Solana Monkey Business','Thugbirdz','Degenerate Ape Academy','Pesky Penguins','Aurory' ]: + print('Seen '+collection) + continue + else: + print('Working on '+collection) + sleep(.10 * 60) + os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output ~/git/nft-deal-score/data/mints '.format(rpc, update_authority)) + + mints = pd.DataFrame() + auth_to_mint = {} + for collection, update_authority in d.items(): + auth_to_mint[update_authority] = collection + for fname in [ './data/mints/'+f for f in os.listdir('./data/mints') ]: + if not '.json' in fname: + continue + with open(fname, 'r') as f: + j = json.load(f) + cur = pd.DataFrame(j) + if len(cur): + cur.columns = ['mint_address'] + cur['update_authority'] = re.split('/|_', fname)[3] + cur['collection'] = cur.update_authority.apply(lambda x: auth_to_mint[x] ) + mints = mints.append(cur) + g = mints.groupby('collection').update_authority.count().reset_index() + mints[mints.update_authority == 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf'] + g.to_csv('~/Downloads/tmp.csv', index=False) + mints.to_csv('./data/solana_mints.csv', index=False) + # scrape_listings(['smb']) # alerted = [] # for i in range(1): diff --git a/scripts/solana-rpc-app/src/index.js b/scripts/solana-rpc-app/src/index.js new file mode 100644 index 00000000..dde2c5d7 --- /dev/null +++ b/scripts/solana-rpc-app/src/index.js @@ -0,0 +1,10 @@ +"use strict"; +exports.__esModule = true; +exports.hello = void 0; +var world = 'world'; +function hello(world) { + if (world === void 0) { world = 'world'; } + return "Hello ".concat(world, "! "); +} +exports.hello = hello; +console.log("Hello!"); diff --git a/scripts/solana-rpc-app/src/index.ts b/scripts/solana-rpc-app/src/index.ts new file mode 100644 index 00000000..a9e25c35 --- /dev/null +++ b/scripts/solana-rpc-app/src/index.ts @@ -0,0 +1,117 @@ +import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js'; +// import bs58 from 'bs58'; + +const connection = new Connection(clusterApiUrl('mainnet-beta')); +const MAX_NAME_LENGTH = 32; +const MAX_URI_LENGTH = 200; +const MAX_SYMBOL_LENGTH = 10; +const MAX_CREATOR_LEN = 32 + 1 + 1; +const MAX_CREATOR_LIMIT = 5; +const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN; +const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172; +const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4; + +console.log(`MAX_METADATA_LEN = ${MAX_METADATA_LEN}`); + + +// const TOKEN_METADATA_PROGRAM = new PublicKey('cndy3Z4yapfJBmL3ShUp5exZKqR3z33thTzeNMm2gRZ'); +const candyMachineId = new PublicKey('trshC9cTgL3BPXoAbp5w9UfnUMWEJx5G61vUijXPMLH'); + +// const getMintAddresses = async (firstCreatorAddress: PublicKey) => { +// const metadataAccounts = await connection.getProgramAccounts( +// TOKEN_METADATA_PROGRAM, +// { +// // The mint address is located at byte 33 and lasts for 32 bytes. +// dataSlice: { offset: 33, length: 32 }, + +// filters: [ +// // Only get Metadata accounts. +// { dataSize: MAX_METADATA_LEN }, + +// // Filter using the first creator. +// { +// memcmp: { +// offset: 1, +// bytes: firstCreatorAddress.toBase58(), +// }, +// }, +// ], +// }, +// ); + +// return metadataAccounts.map((metadataAccountInfo) => ( +// // bs58.encode(metadataAccountInfo.account.data) +// (metadataAccountInfo.account.data) +// )); +// }; + +// getMintAddresses(candyMachineId); + + + + +// import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js'; +// import bs58 from 'bs58'; + +// const connection = new Connection(clusterApiUrl('mainnet-beta')); +// const MAX_NAME_LENGTH = 32; +// const MAX_URI_LENGTH = 200; +// const MAX_SYMBOL_LENGTH = 10; +// const MAX_CREATOR_LEN = 32 + 1 + 1; +// const MAX_CREATOR_LIMIT = 5; +// const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN; +// const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172; +// const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4; + +const TOKEN_METADATA_PROGRAM = new PublicKey('metaqbxxUerdq28cj1RbAWkYQm3ybzjb6a8bt518x1s'); +const CANDY_MACHINE_V2_PROGRAM = new PublicKey('cndy3Z4yapfJBmL3ShUp5exZKqR3z33thTzeNMm2gRZ'); +// const candyMachineId = new PublicKey('ENTER_YOUR_CANDY_MACHINE_ID_HERE'); + +const getMintAddresses = async () => { + const metadataAccounts = await connection.getProgramAccounts( + // TOKEN_METADATA_PROGRAM, + new PublicKey('TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA'), + { + // The mint address is located at byte 33 and lasts for 32 bytes. + // dataSlice: { offset: 33, length: 32 }, + + filters: [ + // Only get Metadata accounts. + { dataSize: 165 }, + + // Filter using the first creator. + { + memcmp: { + offset: 1, + bytes: new PublicKey('trshC9cTgL3BPXoAbp5w9UfnUMWEJx5G61vUijXPMLH').toBase58(), + }, + }, + ], + }, + ); + return metadataAccounts; + + return metadataAccounts.map((metadataAccountInfo) => ( + // bs58.encode(metadataAccountInfo.account.data) + (metadataAccountInfo.account.data) + )); +}; + +const getCandyMachineCreator = async (candyMachine: PublicKey): Promise<[PublicKey, number]> => ( + PublicKey.findProgramAddress( + [Buffer.from('candy_machine'), candyMachine.toBuffer()], + CANDY_MACHINE_V2_PROGRAM, + ) +); + +(async () => { + +// const candyMachineCreator = await getCandyMachineCreator(candyMachineId); +// console.log(`candyMachineCreator`); +// console.log(candyMachineCreator.toString()); + + const a = await getMintAddresses(); + console.log(`a`); + console.log(a); + +})(); \ No newline at end of file diff --git a/scripts/solana-rpc-app/src/test.ts b/scripts/solana-rpc-app/src/test.ts new file mode 100644 index 00000000..3a0ec6d9 --- /dev/null +++ b/scripts/solana-rpc-app/src/test.ts @@ -0,0 +1,30 @@ +import { Connection } from '@metaplex/js'; +import { Metadata } from '@metaplex-foundation/mpl-token-metadata'; +import { PublicKey } from '@solana/web3.js'; + +(async () => { + const connection = new Connection('mainnet-beta'); +// const tokenMint = '9ARngHhVaCtH5JFieRdSS5Y8cdZk2TMF4tfGSWFB9iSK'; + const tokenMint = '5XKoz4nuPFU78jcEVREMZoh9kKsYnCvrTAmpRzvVdJp1'; + const metadataPDA = await Metadata.getPDA(new PublicKey(tokenMint)); +// Metadata.getCandyMachineCreator() +// Metadata.getPDA() + const tokenMetadata = await Metadata.load(connection, metadataPDA); + console.log(tokenMetadata.data); + /* + MetadataData { + key: 4, + updateAuthority: '9uBX3ASjxWvNBAD1xjbVaKA74mWGZys3RGSF7DdeDD3F', + mint: '9ARngHhVaCtH5JFieRdSS5Y8cdZk2TMF4tfGSWFB9iSK', + data: MetadataDataData { + name: 'SMB #1355', + symbol: 'SMB', + uri: 'https://arweave.net/3wXyF1wvK6ARJ_9ue-O58CMuXrz5nyHEiPFQ6z5q02E', + sellerFeeBasisPoints: 500, + creators: [ [Creator] ] + }, + primarySaleHappened: 1, + isMutable: 1 + } + */ +})(); \ No newline at end of file diff --git a/scripts/solana-rpc-app/src/test2.ts b/scripts/solana-rpc-app/src/test2.ts new file mode 100644 index 00000000..d1d73e0f --- /dev/null +++ b/scripts/solana-rpc-app/src/test2.ts @@ -0,0 +1,57 @@ +import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js'; +// import bs58 from 'bs58'; + +const connection = new Connection(clusterApiUrl('mainnet-beta')); +const MAX_NAME_LENGTH = 32; +const MAX_URI_LENGTH = 200; +const MAX_SYMBOL_LENGTH = 10; +const MAX_CREATOR_LEN = 32 + 1 + 1; +const MAX_CREATOR_LIMIT = 5; +const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN; +const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172; +const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4; + +// const TOKEN_METADATA_PROGRAM = new PublicKey('metaqbxxUerdq28cj1RbAWkYQm3ybzjb6a8bt518x1s'); +const TOKEN_METADATA_PROGRAM = new PublicKey('TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA'); +const candyMachineId = new PublicKey('8mNmf15xNrMFQLNSNrHxxswy7a1NfaSFwXHkVUPeMWwU'); + +const getMintAddresses = async (firstCreatorAddress: PublicKey) => { + const metadataAccounts = await connection.getProgramAccounts( + TOKEN_METADATA_PROGRAM, + { + // The mint address is located at byte 33 and lasts for 32 bytes. + // dataSlice: { offset: 33, length: 32 }, + + filters: [ + // Only get Metadata accounts. + // { dataSize: MAX_METADATA_LEN }, + { dataSize: 165 }, + + // Filter using the first creator. + { + memcmp: { + // offset: CREATOR_ARRAY_START, + // bytes: firstCreatorAddress.toBase58(), + offset: 1, + bytes: new PublicKey('4FYjfa71puV4PD12cyqXotu6z2FhLiqFSHjEfYiFLnbj').toBase58(), + }, + }, + ], + }, + ); + return metadataAccounts; + +// return metadataAccounts.map((metadataAccountInfo) => ( +// bs58.encode(metadataAccountInfo.account.data) +// )); +}; + + +(async () => { + + const a = await getMintAddresses(candyMachineId); + console.log(`a`); + console.log(a); + console.log(a.length); + + })(); \ No newline at end of file diff --git a/scripts/solana-rpc-app/tsconfig.json b/scripts/solana-rpc-app/tsconfig.json new file mode 100644 index 00000000..b6152daf --- /dev/null +++ b/scripts/solana-rpc-app/tsconfig.json @@ -0,0 +1,101 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig.json to read more about this file */ + + /* Projects */ + // "incremental": true, /* Enable incremental compilation */ + // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ + // "tsBuildInfoFile": "./", /* Specify the folder for .tsbuildinfo incremental compilation files. */ + // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects */ + // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ + // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + + /* Language and Environment */ + "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ + // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ + // "jsx": "preserve", /* Specify what JSX code is generated. */ + // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ + // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ + // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */ + // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ + // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */ + // "reactNamespace": "", /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */ + // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ + // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + + /* Modules */ + "module": "commonjs", /* Specify what module code is generated. */ + // "rootDir": "./", /* Specify the root folder within your source files. */ + // "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ + // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ + // "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */ + // "types": [], /* Specify type package names to be included without being referenced in a source file. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + // "resolveJsonModule": true, /* Enable importing .json files */ + // "noResolve": true, /* Disallow `import`s, `require`s or ``s from expanding the number of files TypeScript should add to a project. */ + + /* JavaScript Support */ + // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ + // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ + // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */ + + /* Emit */ + // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ + // "declarationMap": true, /* Create sourcemaps for d.ts files. */ + // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ + "sourceMap": true, /* Create source map files for emitted JavaScript files. */ + // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */ + "outDir": "dist", /* Specify an output folder for all emitted files. */ + // "removeComments": true, /* Disable emitting comments. */ + // "noEmit": true, /* Disable emitting files from a compilation. */ + // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ + // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types */ + // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ + // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ + // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ + // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ + // "newLine": "crlf", /* Set the newline character for emitting files. */ + // "stripInternal": true, /* Disable emitting declarations that have `@internal` in their JSDoc comments. */ + // "noEmitHelpers": true, /* Disable generating custom helper functions like `__extends` in compiled output. */ + // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ + // "preserveConstEnums": true, /* Disable erasing `const enum` declarations in generated code. */ + // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ + // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ + + /* Interop Constraints */ + // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ + // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ + "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */ + // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ + "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ + + /* Type Checking */ + "strict": true, /* Enable all strict type-checking options. */ + // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied `any` type.. */ + // "strictNullChecks": true, /* When type checking, take into account `null` and `undefined`. */ + // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ + // "strictBindCallApply": true, /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */ + // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ + // "noImplicitThis": true, /* Enable error reporting when `this` is given the type `any`. */ + // "useUnknownInCatchVariables": true, /* Type catch clause variables as 'unknown' instead of 'any'. */ + // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ + // "noUnusedLocals": true, /* Enable error reporting when a local variables aren't read. */ + // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read */ + // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ + // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ + // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ + // "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */ + // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type */ + // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ + // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ + + /* Completeness */ + // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ + "skipLibCheck": true /* Skip type checking all .d.ts files. */ + } +} diff --git a/solana_model.py b/solana_model.py index b85f8d9e..0f8357ad 100644 --- a/solana_model.py +++ b/solana_model.py @@ -2,10 +2,12 @@ import collections import os import re import json +from textwrap import indent import warnings import requests import numpy as np import pandas as pd +import kutils as ku import urllib.request import tensorflow as tf import snowflake.connector @@ -13,7 +15,7 @@ import snowflake.connector from curses import meta from copy import deepcopy from datetime import datetime -from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.linear_model import LinearRegression, RidgeCV, Lasso, Ridge from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV @@ -27,649 +29,694 @@ warnings.filterwarnings('ignore') # Define Helper Functions # ################################### def standardize_df(df, cols, usedf=None, verbose=False): - for c in cols: - if type(usedf) != type(pd.DataFrame()): - usedf = df - mu = usedf[c].mean() - sd = usedf[c].std() - if verbose: - print(c) - if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: - # df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) - df['std_{}'.format(c)] = df[c] - else: - df['std_{}'.format(c)] = (df[c] - mu) / sd - return(df) + for c in cols: + if type(usedf) != type(pd.DataFrame()): + usedf = df + mu = usedf[c].mean() + sd = usedf[c].std() + if verbose: + print(c) + if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: + # df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) + df['std_{}'.format(c)] = df[c] + else: + df['std_{}'.format(c)] = (df[c] - mu) / sd + return(df) -def merge(left, right, on=None, how='inner', ensure=True, verbose=True): - df = left.merge(right, on=on, how=how) - if len(df) != len(left) and (ensure or verbose): - print('{} -> {}'.format(len(left), len(df))) - cur = left.merge(right, on=on, how='left') - cols = set(right.columns).difference(set(left.columns)) - print(cols) - col = list(cols)[0] - missing = cur[cur[col].isnull()] - print(missing.head()) - if ensure: - assert(False) - return(df) +def merge(left, right, on=None, how='inner', ensure=True, verbose=True, message = ''): + df = left.merge(right, on=on, how=how) + if len(df) != len(left) and (ensure or verbose): + if message: + print(message) + print('{} -> {}'.format(len(left), len(df))) + cur = left.merge(right, on=on, how='left') + cols = set(right.columns).difference(set(left.columns)) + print(cols) + if ensure: + col = list(cols)[0] + missing = cur[cur[col].isnull()] + print(missing.head()) + assert(False) + return(df) def just_float(x): - x = re.sub('[^\d\.]', '', str(x)) - return(float(x)) + x = re.sub('[^\d\.]', '', str(x)) + return(float(x)) def calculate_percentages(df, cols=[]): - add_pct = not 'pct' in df.columns - if not len(cols): - cols = df.columns - if add_pct: - df['pct'] = 1 - for c in cols: - g = df[c].value_counts().reset_index() - g.columns = [ c, 'N' ] - col = '{}_pct'.format(c) - g[col] = g.N / g.N.sum() - df = df.merge( g[[ c, col ]] ) - if add_pct: - df['pct'] = df.pct * df[col] - return(df) + add_pct = not 'pct' in df.columns + if not len(cols): + cols = df.columns + if add_pct: + df['pct'] = 1 + for c in cols: + g = df[c].value_counts().reset_index() + g.columns = [ c, 'N' ] + col = '{}_pct'.format(c) + g[col] = g.N / g.N.sum() + df = df.merge( g[[ c, col ]] ) + if add_pct: + df['pct'] = df.pct * df[col] + return(df) def get_sales(check_exclude = True, exclude=[]): - s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) - s_df['token_id'] = s_df.token_id.astype(str) - s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) - s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] - s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] - s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] - for e in exclude: - s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] - s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] + s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) + s_df['token_id'] = s_df.token_id.astype(str) + s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) + s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] + s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] + s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] + for e in exclude: + s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] + s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] - # exclude wierd data points - if not check_exclude: - exclude = pd.read_csv('./data/exclude.csv') - exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) - exclude['token_id'] = exclude.token_id.astype(str) - s_df = s_df.merge(exclude, how='left') - s_df = s_df[s_df.exclude.isnull()] - del s_df['exclude'] + # exclude wierd data points + if not check_exclude: + exclude = pd.read_csv('./data/exclude.csv') + exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) + exclude['token_id'] = exclude.token_id.astype(str) + s_df = s_df.merge(exclude, how='left') + s_df = s_df[s_df.exclude.isnull()] + del s_df['exclude'] - ########################### - # Calculate Floor # - ########################### - s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) - s_df['timestamp'] = s_df.block_timestamp.astype(int) - s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) + ########################### + # Calculate Floor # + ########################### + s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) + s_df['timestamp'] = s_df.block_timestamp.astype(int) + s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) - # lowest price in last 20 sales - s_df = s_df.sort_values(['collection','block_timestamp']) - s_df['mn_20'] = s_df.groupby('collection').price.shift(1) - s_df = s_df.sort_values(['collection','block_timestamp']) - s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) + # lowest price in last 20 sales + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) - # exclude sales that are far below the existing floor - s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] + # exclude sales that are far below the existing floor + s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] - # 10%ile of last 20 sales - s_df = s_df.sort_values(['collection','block_timestamp']) - s_df['mn_20'] = s_df.groupby('collection').price.shift(1) - s_df = s_df.sort_values(['collection','block_timestamp']) - s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) - s_df['sim'] = 0 - s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) - s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) - return(s_df) + # 10%ile of last 20 sales + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True) + s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True) + s_df['sim'] = 0 + s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) + s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) + return(s_df) + +def get_coefs(cols, coef): + coefs = [] + for a, b in zip(cols, coef): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + # coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False) + # coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x ) + # coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 ) + coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 or 'adj_nft_rank' in x['col'] or 'is_top_' in x['col'] or 'y_pred_' in x['col'] else -1 , 1 ) + coefs['val'] = coefs.mult * coefs.coef + coefs = coefs.sort_values('val', ascending=0) + return(coefs) def train_model(check_exclude, supplement_with_listings): - exclude = [ - ( 'aurory', 2239, 3500 ) - ] - s_df = get_sales(check_exclude, exclude) - # s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) - # s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) - # s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] - # s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] - # s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] - # for e in exclude: - # s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] - # s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] + exclude = [ + ( 'aurory', 2239, 3500 ) + , ( 'aurory', 1876, 789 ) + , ( 'aurory', 2712, 500 ) + , ( 'aurory', 5368, 500 ) + , ( 'aurory', 9239, 1700 ) + ] + s_df = get_sales(check_exclude, exclude) + # s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) + # s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) + # s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] + # s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] + # s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] + # for e in exclude: + # s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] + # s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] - # # exclude wierd data points - # if not check_exclude: - # exclude = pd.read_csv('./data/exclude.csv') - # exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) - # s_df = s_df.merge(exclude, how='left') - # s_df = s_df[s_df.exclude.isnull()] - # del s_df['exclude'] + # # exclude wierd data points + # if not check_exclude: + # exclude = pd.read_csv('./data/exclude.csv') + # exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) + # s_df = s_df.merge(exclude, how='left') + # s_df = s_df[s_df.exclude.isnull()] + # del s_df['exclude'] - ######################### - # Load Metadata # - ######################### - m_df = pd.read_csv('./data/metadata.csv') - m_df['token_id'] = m_df.token_id.astype(str) - m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x)) - m_df['token_id'] = m_df.token_id.astype(str) - # remove ones that are not actually metadata - m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] - m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x ) - m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')] - sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique()) + ######################### + # Load Metadata # + ######################### + m_df = pd.read_csv('./data/metadata.csv') + # m_df[m_df.collection == 'Aurory'][['collection','feature_name']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False) + sorted([x for x in m_df.feature_name.unique() if 'nft_' in x]) + m_df['token_id'] = m_df.token_id.astype(str) + m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x)) + # remove ones that are not actually metadata + m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] + m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x ) + m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')] + sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique()) - ##################################### - # Exclude Special LunaBulls # - ##################################### - tokens = pd.read_csv('./data/tokens.csv') - tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x)) - tokens.token_id.unique() - lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique() - m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ] - s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ] - s_df = s_df.drop_duplicates(subset=['collection','token_id','price']) + ##################################### + # Exclude Special LunaBulls # + ##################################### + tokens = pd.read_csv('./data/tokens.csv') + tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x)) + tokens['token_id'] = tokens.token_id.astype(str) + m_df = merge(m_df, tokens[['collection','token_id','clean_token_id']], how='left', ensure=True, on=['collection','token_id'], message='m_df x tokens') + m_df['token_id'] = m_df.clean_token_id.fillna(m_df.token_id).astype(int).astype(str) + s_df = merge(s_df, tokens[['collection','token_id','clean_token_id']], how='left', ensure=True, on=['collection','token_id'], message='s_df x tokens') + s_df['token_id'] = s_df.clean_token_id.fillna(s_df.token_id).astype(int).astype(str) + tokens.token_id.unique() + lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique() + m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ] + s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ] + s_df = s_df.drop_duplicates(subset=['collection','token_id','price']) - ########################### - # Calculate Floor # - ########################### - # s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) - # s_df['timestamp'] = s_df.block_timestamp.astype(int) - # s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) + ########################### + # Calculate Floor # + ########################### + # s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) + # s_df['timestamp'] = s_df.block_timestamp.astype(int) + # s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) - # # lowest price in last 20 sales - # s_df = s_df.sort_values(['collection','block_timestamp']) - # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) - # s_df = s_df.sort_values(['collection','block_timestamp']) - # s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) + # # lowest price in last 20 sales + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) - # # exclude sales that are far below the existing floor - # s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] + # # exclude sales that are far below the existing floor + # s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] - # # 10%ile of last 20 sales - # s_df = s_df.sort_values(['collection','block_timestamp']) - # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) - # s_df = s_df.sort_values(['collection','block_timestamp']) - # s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) - # s_df['sim'] = 0 - # s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) - # s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) + # # 10%ile of last 20 sales + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) + # s_df['sim'] = 0 + # s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) + # s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) - if supplement_with_listings: - pred_price = pd.read_csv('./data/pred_price.csv') - pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x)) - listings = pd.read_csv('./data/listings.csv') - listings['collection'] = listings.collection.apply(lambda x: clean_name(x)) - listings['block_timestamp'] = s_df.block_timestamp.max() - floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']] - tmp = merge(listings, pred_price, ensure=False) - tmp = tmp[tmp.price < tmp.pred_price] - tmp['timestamp'] = tmp.block_timestamp.astype(int) - tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) - tmp = merge(tmp, floor) + listings = pd.read_csv('./data/listings.csv') + if supplement_with_listings: + pred_price = pd.read_csv('./data/pred_price.csv') + pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x)) + listings['collection'] = listings.collection.apply(lambda x: clean_name(x)) + listings['block_timestamp'] = s_df.block_timestamp.max() + listings = listings[listings.collection.isin(pred_price.collection.unique())] + floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']] + tmp = merge(listings, pred_price, ensure=False) + tmp = tmp[tmp.price < tmp.pred_price] + tmp['timestamp'] = tmp.block_timestamp.astype(int) + tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) + tmp = merge(tmp, floor) - n = round(len(s_df) / 5000) - n = max(1, min(2, n)) - # n = 1 - for _ in range(n): - s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) - # tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price] - # s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) - # tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price] - # tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) + n = round(len(s_df) / 5000) + n = max(1, min(3, n)) + print('Supplement with {}x listings'.format(n)) + # n = 1 + for _ in range(n): + s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) + # tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price] + # s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) + # tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price] + # tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) - ########################### - # Calculate Floor # - ########################### - coefsdf = pd.DataFrame() - salesdf = pd.DataFrame() - attributes = pd.DataFrame() - pred_price = pd.DataFrame() - feature_values = pd.DataFrame() - # non-binary in model: collection_rank, temperature, weight - # non-binary in model; exclude from rarity: pct, rank, score - # exclude from model: lucky_number, shower - # exclude from model and rarity %: meteor_id, attribute_count, cracking_date - ALL_NUMERIC_COLS = ['rank','score','pct'] - ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'] - MODEL_EXCLUDE_COLS = { - # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] - 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight'] - , 'Solana Monkey Business': ['Clothes_Diamond'] - } - MODEL_INCLUDE_COLS = { - # 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black'] - 'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana'] - } - RARITY_EXCLUDE_COLS = { - # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] - 'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group'] - } - NUMERIC_COLS = { - 'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank'] - } - ATT_EXCLUDE_COLS = { - 'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group'] - } - collection = 'Solana Monkey Business' - # for collection in s_df.collection.unique(): - for collection in [ 'Solana Monkey Business' ]: - print('Working on collection {}'.format(collection)) - sales = s_df[ s_df.collection == collection ] - metadata = m_df[ m_df.collection == collection ] - metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False) - metadata[metadata.token_id == '1'] - metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() ) - metadata[metadata.token_id == '1'] - metadata[metadata.feature_name == 'rank'] - metadata.feature_name.unique() - metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')] + ########################### + # Calculate Floor # + ########################### + # coefsdf = pd.DataFrame() + # salesdf = pd.DataFrame() + # attributes = pd.DataFrame() + # pred_price = pd.DataFrame() + # feature_values = pd.DataFrame() + coefsdf = pd.read_csv('./data/coefsdf.csv') + salesdf = pd.read_csv('./data/model_sales.csv') + attributes = pd.read_csv('./data/attributes.csv') + pred_price = pd.read_csv('./data/pred_price.csv') + feature_values = pd.read_csv('./data/feature_values.csv') + # non-binary in model: collection_rank, temperature, weight + # non-binary in model; exclude from rarity: pct, rank, score + # exclude from model: lucky_number, shower + # exclude from model and rarity %: meteor_id, attribute_count, cracking_date + ALL_NUMERIC_COLS = ['rank','score','pct'] + ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'] + MODEL_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight'] + , 'Solana Monkey Business': ['Clothes_Diamond'] + } + MODEL_INCLUDE_COLS = { + # 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black'] + 'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana'] + } + RARITY_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group'] + } + NUMERIC_COLS = { + 'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank'] + } + ATT_EXCLUDE_COLS = { + 'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group'] + } - # categorize columns - all_names = sorted(metadata.feature_name.unique()) - model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else [] - num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS) - num_features = [ x for x in num_features if x in metadata.feature_name.unique() ] - num_metadata = metadata[metadata.feature_name.isin(num_features)] - num_metadata[num_metadata.feature_name == 'nft_rank'] - cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ]) - cat_metadata = metadata[metadata.feature_name.isin(cat_features)] + collection = 'Aurory' + collection = 'Solana Monkey Business' + collection = 'LunaBulls' + # for collection in [ 'Solana Monkey Business' ]: + # for collection in [ 'Aurory' ]: + # for collection in [ 'Aurory','Solana Monkey Business' ]: + collections = list(s_df[['collection']].drop_duplicates().merge(m_df[['collection']].drop_duplicates()).collection.unique()) + for collection in [ 'LunaBulls' ]: + for df in [ coefsdf, salesdf, attributes, pred_price, feature_values ]: + if 'collection' in df.columns: + df = df[df.collection != collection] + print('Working on collection {}'.format(collection)) + sales = s_df[ s_df.collection == collection ] + metadata = m_df[ m_df.collection == collection ] + metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False) + metadata[metadata.token_id == '1'] + metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() ) + metadata[metadata.token_id == '1'] + metadata[metadata.feature_name == 'rank'] + metadata.feature_name.unique() + metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')] - # create dummies for binary variables - num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() - num_metadata.columns = [ 'collection','token_id' ] + num_features + # categorize columns + all_names = sorted(metadata.feature_name.unique()) + model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else [] + num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS) + num_features = [ x for x in num_features if x in metadata.feature_name.unique() ] + num_metadata = metadata[metadata.feature_name.isin(num_features)] + num_metadata[num_metadata.feature_name == 'nft_rank'] + cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ]) + cat_metadata = metadata[metadata.feature_name.isin(cat_features)] - # create dummies for binary variables - cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() - cat_metadata.columns = [ 'collection','token_id' ] + cat_features - cat_metadata = calculate_percentages( cat_metadata, cat_features ) - dummies = pd.get_dummies(cat_metadata[cat_features]) - dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False) - if collection == 'Solana Monkey Business': - dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int) - dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int) - dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int) - dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int) - # dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int) - # dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int) - # dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int) - dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int) - dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int) - del dummies['matching_white'] - del dummies['matching_black'] - cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) - del cat_metadata['pct'] + # create dummies for binary variables + num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + num_metadata.columns = [ 'collection','token_id' ] + num_features - for c in model_exclude: - if c in dummies.columns: - del dummies[c] - pred_cols = num_features + list(dummies.columns) + # create dummies for binary variables + cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + cat_metadata.columns = [ 'collection','token_id' ] + cat_features + cat_metadata = calculate_percentages( cat_metadata, cat_features ) + dummies = pd.get_dummies(cat_metadata[cat_features]) + # dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False) + if collection == 'Solana Monkey Business': + dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int) + dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int) + dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int) + dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int) + # dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int) + # dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int) + # dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int) + dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int) + dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int) + del dummies['matching_white'] + del dummies['matching_black'] + cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) + del cat_metadata['pct'] - # create training df - df = merge(sales, num_metadata, ['collection','token_id'], ensure=False) - df = merge(df, cat_metadata, ['collection','token_id']) - df[df.adj_nft_rank_0 == 'None'] - df[df.adj_nft_rank_0 == 'None'][['collection','token_id','nft_rank','adj_nft_rank_0']] - df.adj_nft_rank_0.unique() - for c in num_features: - df[c].unique() - df[df.nft_rank == 'None'] - df[df[c] == 'None'][[ 'nft_rank' ]] - df[c] = df[c].apply(lambda x: just_float(x)) - df.sort_values('price', ascending=0)[['price']].head(20) - # df.groupby(['rarity','weight']).price.mean() + for c in model_exclude: + if c in dummies.columns: + del dummies[c] + pred_cols = num_features + list(dummies.columns) + pred_cols = [ c for c in pred_cols if not c in model_exclude ] - # create target cols - target_col = 'adj_price' - df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) - df = df[df[target_col].notnull()] - df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) - df['rel_price_0'] = df[target_col] - df.mn_20 - df['rel_price_1'] = df[target_col] / df.mn_20 - df = df[df.mn_20 > 0] - df['log_mn_20'] = np.log(df.mn_20) - print('Training on {} sales'.format(len(df))) - df = standardize_df(df, pred_cols) - - std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ] - std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ] - - ######################### - # Run the Model # - ######################### - tmp = df[std_pred_cols].count().reset_index() - tmp.columns = ['a','b'] - tmp.sort_values('b').head(20) - rem = list(tmp[tmp.b==0].a.values) - std_pred_cols = [ c for c in std_pred_cols if not c in rem ] - if collection == 'Levana Dragon Eggs': - std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ] - mn = df.timestamp.min() - mx = df.timestamp.max() - df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) ) - if collection == 'Levana Dragon Eggs': - df['wt'] = 1 - # df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) ) - # df.sort_values('price', ascending=0)[['price','wt']].head(20) - # std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ] - cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ] - cur_std_pred_cols = deepcopy(std_pred_cols) - g = df[std_pred_cols].sum().reset_index() - g.columns = [ 'col','cnt' ] - g = g.sort_values('cnt') - g.head(20) - if collection == 'Solana Monkey Busines': - df.loc[ df.token_id == '903', 'nft_rank' ] = 18 - df[df.token_id=='903'] - df[df.token_id==903] - X = df[cur_std_pred_cols].values - y_0 = df.rel_price_0.values - y_1 = df.rel_price_1.values - # df['tmp'] = df.collection_rank.apply(lambda x: int((8888 - x)/1000) ) - # g = df.groupby('tmp').rel_price_0.mean().reset_index() - # g['g'] = g.tmp.apply(lambda x: (((1.42**(x**1.42)) - 1) / 20) + 0.13 ) - # g['g'] = g.tmp.apply(lambda x: 2**x ) - # g - - # run the linear model - # clf_lin = Lasso(alpha=1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) - - # clf_lin = Ridge(alpha=1000) - # clf_lin = Ridge(alpha=100) - # clf_lin.fit(X, y_0, df.wt.values) - # clf_las = Lasso(alpha=1.5) - # clf_las.fit(X, y_0, df.wt.values) - # clf_rfr = RandomForestRegressor() - # clf_rfr.fit(X, y_0) - # clf_rfr.feature_importances_ - # imp = [] - # for a, b, c, d in zip(cur_std_pred_cols, clf_rfr.feature_importances_, clf_lin.coef_, clf_las.coef_): - # imp += [[a, b, abs(c), abs(d)]] - # imp = pd.DataFrame(imp, columns=['col','imp','lin','las']).sort_values('imp', ascending=0) - # imp['imp_rk'] = imp.imp.rank(ascending=0) - # imp['lin_rk'] = imp.lin.rank(ascending=0) - # imp['las_rk'] = imp.las.rank(ascending=0) - # imp['include'] = 0 - # imp.to_csv('~/Downloads/coef.csv', index=False) - # imp.head(50).tail(20) - # imp.head(40).tail(10) - # imp.head(50).tail(10) - # nft_rank should be negative - # adj_nft_rank_0 should be positive - # adj_nft_rank_1 should be positive - clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) - clf_lin = Ridge(alpha=30, fit_intercept=True) - clf_lin = Lasso(alpha=.225) - def get_coefs(cols, coef): - coefs = [] - for a, b in zip(cols, coef): - coefs += [[a,b]] - coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) - # coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False) - coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x ) - # coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 ) - coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 else -1 , 1 ) - coefs['val'] = coefs.mult * coefs.coef - coefs = coefs.sort_values('val', ascending=0) - return(coefs) - - mn = -1 - print('Starting with {} cols'.format(len(cur_std_pred_cols))) - while mn < 0 or len(cur_std_pred_cols) > 140: - X = df[cur_std_pred_cols].values - clf_lin.fit(X, y_0, df.wt.values) - coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_) - tmp = coefs[coefs.tmp == True] - mn = min(coefs.val) if len(coefs) else 0 - if mn < 0: - cur_std_pred_cols.remove(coefs.col.values[-1]) - else: - cur_std_pred_cols.remove(coefs.col.values[-1]) - coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False) - len(coefs[coefs.coef !=0]) - # print(coefs[coefs.coef !=0]) - # print(len(coefs[coefs.coef !=0])) - INCLUDE_COLS = MODEL_INCLUDE_COLS[collection] if collection in MODEL_INCLUDE_COLS.keys() else [] - - # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) - - cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique()) - for c in INCLUDE_COLS: - if not c in cur_std_pred_cols: - cur_std_pred_cols.append(c) - lin_std_pred_cols = cur_std_pred_cols - X = df[cur_std_pred_cols].values - # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) - # clf_lin = Lasso(alpha=0.1) - clf_lin = Lasso(alpha=.1) - clf_lin.fit(X, y_0, df.wt.values) - coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_) - print(coefs[coefs.coef !=0]) - print(len(coefs[coefs.coef !=0])) - print(coefs[coefs.col.isin(INCLUDE_COLS)]) - coefs[coefs.coef !=0].to_csv('./data/coefs/{}_lin_coefs.csv'.format(collection), index=False) - df[df['std_Attribute Count_0']!=0] - df['std_Attribute Count_0'].unique() - coefs[coefs.col.isin(INCLUDE_COLS)] - df['pred'] = clf_lin.predict(X) - df['err'] = df.pred - df.rel_price_0 - df[df['std_Hat_Space Warrior Hair'] == 1][['pred',target_col]].mean() - df[df['std_Hat_Space Warrior Hair'] == 1].err.median() - tmp = [] - for c in std_pred_cols: - if len(df[df[c] == 1]): - mu = round(df[df[c] == 1].err.mean()) - md = round(df[df[c] == 1].err.median()) - n = len(df[df[c] == 1]) - tmp += [[ c, int(c in cur_std_pred_cols ), n, mu, md ]] - # print('{}: {}, {}, {}'.format(c, mu, md, n)) - tmp = pd.DataFrame(tmp, columns=['c','i','n','mu','md']).sort_values('mu') - tmp.to_csv('~/Downloads/tmp4.csv', index=False) - tmp[tmp.i == 0].head(8) - tmp[tmp.i == 0].tail(8) - 'std_Hat_Crown','std_Attribute Count_0','std_Hat_Space Warrior Hair','std_Eyes_Laser Eyes','std_Type_Solana','' - df[df['std_Hat_Space Warrior Hair'] == 1].err.mean() - df[df['std_Hat_Strawhat'] == 1][['pred','rel_price_0']].mean() - - df['pred_lin'] = clf_lin.predict(X) - df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 - df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) ) - # df[df.genus_Titan==1][['rarity']] - # df[(df.rarity=='Legendary') | (df.genus=='Titan')][['genus','rarity']] - - # run the log model - # clf_log = Lasso(1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_log = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) - clf_log = Ridge(alpha=30) - clf_log = Lasso(0.003) - # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) - - mn = -1 - cur_std_pred_cols = deepcopy(std_pred_cols) - while mn < 0 or len(cur_std_pred_cols) > 140: - X = df[cur_std_pred_cols].values - clf_log.fit(X, y_1, df.wt.values) - coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) - tmp = coefs[coefs.tmp == True] - mn = min(tmp.coef) if len(tmp) else 0 - if mn < 0: - cur_std_pred_cols.remove(tmp.col.values[-1]) - else: - cur_std_pred_cols.remove(coefs.col.values[-1]) - coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) - coefs[coefs.coef !=0].to_csv('./data/coefs/{}_log_coefs.csv'.format(collection), index=False) - # print(coefs[coefs.coef !=0]) - len(coefs[coefs.coef !=0]) - # cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique()) - for c in INCLUDE_COLS: - if not c in cur_std_pred_cols: - cur_std_pred_cols.append(c) - log_std_pred_cols = cur_std_pred_cols - X = df[cur_std_pred_cols].values - clf_log = Lasso(0.001) - clf_log.fit(X, y_1, df.wt.values) - coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) - print(coefs[coefs.coef !=0]) - print(len(coefs[coefs.coef !=0])) - print(coefs[coefs.col.isin(INCLUDE_COLS)]) - # clf_log.fit(X, y_1, df.wt.values) - # if collection == 'Levana Dragon Eggs': - # coefs = [] - # for a, b in zip(std_pred_cols, clf_lin.coef_): - # coefs += [[a,b]] - # coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) - # coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False) - df['pred_log'] = clf_log.predict(X) - df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 - df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) ) - df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50) - df['err'] = df.err_lin * df.err_log + # create training df + sales['token_id'] = sales.token_id.astype(str) + num_metadata['token_id'] = num_metadata.token_id.astype(str) + df = merge(sales, num_metadata, ['collection','token_id'], ensure=False) + df = merge(df, cat_metadata, ['collection','token_id'], ensure=False) + for c in num_features: + df[c] = df[c].apply(lambda x: just_float(x)) - # combine the models - clf = LinearRegression(fit_intercept=False) - clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values ) - df[['pred_lin','pred_log',target_col]].mean() - print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) - l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] - tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price']) - if clf.coef_[0] < 0: - print('Only using log') - df['pred'] = df.pred_log - tmp['lin_coef'] = 0 - tmp['log_coef'] = 1 - elif clf.coef_[1] < 0: - print('Only using lin') - df['pred'] = df.pred_lin - tmp['lin_coef'] = 1 - tmp['log_coef'] = 0 - else: - print('Only using BOTH!') - df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) - coefsdf = coefsdf.append(tmp) - df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) + ################################# + # Create Test DataFrame # + ################################# + # test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=True, how='left') + ensure = not collection in ['Aurory'] + # test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=ensure) + test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=False) + for c in num_features: + test[c] = test[c].apply(lambda x: just_float(x) ) + tail = df.sort_values('timestamp').tail(1) + test.loc[ test.token_id == '903', 'nft_rank' ] = 18 + for c in [ 'std_timestamp','mn_20','log_mn_20' ]: + if c in tail.columns: + test[c] = tail[c].values[0] - # print out some summary stats - df['err'] = df[target_col] - df.pred - df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df) - df['q'] = df.q.apply(lambda x: int(round(x)) ) - df['pct_err'] = (df[target_col] / df.pred) - 1 - pe_mu = df.pct_err.mean() - pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std() - pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std() - df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) - df['pred_sd'] = df.pred * pe_sd - # print(df.groupby('q')[['err','pred',target_col]].mean()) - print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean()) - print(df.groupby('q')[['err','pred',target_col]].mean()) - # df.err.mean() - # df[df.weight >= 3.5].err.mean() - df[df.pred < 200].err.mean() - df['collection'] = collection - print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) - salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) ) + for tmp in [df, test]: + for i in [100, 250, 1000]: + tmp['is_top_{}'.format(i)] = (tmp.nft_rank <= i).astype(int) + pred_cols += [ 'is_top_100','is_top_250','is_top_1000' ] + df.sort_values('price', ascending=0)[['price']].head(20) + # df.groupby(['rarity','weight']).price.mean() + + # create target cols + target_col = 'adj_price' + df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) + df['mn_20'] = df.mn_20 * 1.01 + df = df[df[target_col].notnull()] + df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) + df['rel_price_0'] = df[target_col] - df.mn_20 + df['rel_price_1'] = df[target_col] / df.mn_20 + df = df[df.mn_20 > 0] + df['log_mn_20'] = np.log(df.mn_20) + print('Training on {} sales'.format(len(df))) + df = standardize_df(df, pred_cols) + test = standardize_df(test, pred_cols, df) + + std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ] + std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ] + + ######################### + # Run the Model # + ######################### + tmp = df[std_pred_cols].count().reset_index() + tmp.columns = ['a','b'] + tmp.sort_values('b').head(20) + rem = list(tmp[tmp.b==0].a.values) + std_pred_cols = [ c for c in std_pred_cols if not c in rem ] + if collection == 'Levana Dragon Eggs': + std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ] + mn = df.timestamp.min() + mx = df.timestamp.max() + df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) ) + df.loc[ (df.collection == 'Aurory') & (df.block_timestamp <= '2021-09-05'), 'wt' ] = 0.05 + if collection == 'Levana Dragon Eggs': + df['wt'] = 1 + # df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) ) + # df.sort_values('price', ascending=0)[['price','wt']].head(20) + # std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ] + cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ] + cur_std_pred_cols = deepcopy(std_pred_cols) + g = df[std_pred_cols].sum().reset_index() + g.columns = [ 'col','cnt' ] + g = g.sort_values('cnt') + g.head(20) + if collection == 'Solana Monkey Busines': + df.loc[ df.token_id == '903', 'nft_rank' ] = 18 + df[df.token_id=='903'] + df[df.token_id==903] + df = df.reset_index(drop=True) + X = df[cur_std_pred_cols].values + y_0 = df.rel_price_0.values + y_1 = df.rel_price_1.values + + # CUR_FLOOR = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] + CUR_FLOOR = listings[(listings.collection == collection) & (listings.price.notnull())].price.min() + print('CUR_FLOOR = {}'.format(CUR_FLOOR)) + + for target_col in [ 'rel_price_0', 'rel_price_1' ]: + it = target_col[-1] + y_val = df[target_col].values + print('target_col = {}'.format(target_col)) + mn = -1 + cols = [ 'std_nft_rank','std_adj_nft_rank_0','std_adj_nft_rank_1','std_adj_nft_rank_2' ] + clf = Ridge(alpha = 1) + # while mn < 0 and len(cols): + # clf.fit(df[cols].values, y_val, df.wt.values) + # coefs = get_coefs(cols, clf.coef_) + # mn = min(coefs.val) if len(coefs) else 0 + # if mn < 0: + # cols.remove(coefs.col.values[-1]) + + col = 'rarity_value_'+it + model = 'ridge' + df[col] = 0 + test[col] = 0 + # df, bst_p, bst_r = ku.get_bst_params( model, df, df[cols].values, y_val, target_col, col, verbose = True, wt_col='wt' ) + # test = ku.apply_model( model, bst_p, df, test, cols, target_col, col) + + # df['rarity_value_'+it] = clf.predict(df[cols].values) + rar_adj_target_col = 'rar_adj_'+target_col + df[rar_adj_target_col] = df[target_col] - df['rarity_value_'+it] + # test[rar_adj_target_col] = test[target_col] - test['rarity_value_'+it] + y_val_rar_adj = df[rar_adj_target_col].values + models = ['las','ridge'] if target_col == 'rel_price_1' else ['las','ridge','rfr'] + for model in models: + cur_std_pred_cols = std_pred_cols + print(model) + y = y_val_rar_adj if model in ['rfr'] else y_val + col = 'y_pred_{}_{}'.format(model, it) + df, bst_p, bst_r = ku.get_bst_params( model, df, X, y, target_col, col, verbose = True, wt_col='wt' ) + + # if model == 'ridge': + # while len(cur_std_pred_cols) > 50: + # coefs = get_coefs(cur_std_pred_cols, clf.coef_) + # cur_std_pred_cols.remove(coefs.col.values[-1]) + # new_X = df[cur_std_pred_cols].values + # clf = ku.get_model(model, bst_p) + # clf.fit(new_X, y) + # # coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it)) + # new_X = df[cur_std_pred_cols].values + # df, bst_p, bst_r = ku.get_bst_params( model, df, new_X, y, target_col, col, verbose = True, wt_col='wt' ) + + if model in ['las','ridge']: + clf = ku.get_model(model, bst_p) + clf.fit(X, y) + coefs = get_coefs(cur_std_pred_cols, clf.coef_) + mn = coefs.val.min() + while mn < 0: + cur_std_pred_cols = [ c for c in coefs[coefs.val >= 0 ].col.unique() ] + X_new = df[cur_std_pred_cols].values + clf.fit(X_new, y) + # df, bst_p, bst_r = ku.get_bst_params( model, df, df[cur_std_pred_cols].values, y, target_col, col, verbose = True, wt_col='wt' ) + coefs = get_coefs(cur_std_pred_cols, clf.coef_) + mn = coefs.val.min() + coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it), index=False) + test = ku.apply_model( model, bst_p, df, test, cur_std_pred_cols, target_col, col) + if model in ['rfr']: + df[col] = df[col] + df['rarity_value_'+it] + test[col] = test[col] + test['rarity_value_'+it] + + mn = -1 + cols = [ c for c in df.columns if c[:7] == 'y_pred_' and c[-1] == it ] + clf = LinearRegression() + df[cols].mean() + df[cols].median() + test[cols].mean() + test[cols].median() + while mn < 0 and len(cols): + clf.fit(df[cols].values, df[target_col].values) + coefs = get_coefs(cols, clf.coef_) + mn = min(coefs.val) if len(coefs) else 0 + if mn < 0: + cols.remove(coefs.col.values[-1]) + else: + print(coefs) + if it == '0': + df['pred_lin'] = clf.predict(df[cols].values) + df.mn_20 + test['pred_lin'] = clf.predict(test[cols].values) + CUR_FLOOR + # df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 + else: + df['pred_log'] = clf.predict(df[cols].values) + df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 + test['pred_log'] = clf.predict(test[cols].values) + test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * CUR_FLOOR + + clf = LinearRegression(fit_intercept=False) + target_col = 'adj_price' + clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values ) + clf.score( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values ) + df[['pred_lin','pred_log',target_col]].mean() + df[['pred_lin','pred_log',target_col]].median() + test[['pred_lin','pred_log']].mean() + test[['pred_lin','pred_log']].median() + + print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) + tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], CUR_FLOOR]], columns=['collection','lin_coef','log_coef','floor_price']) + if clf.coef_[0] < 0: + print('Only using log') + df['pred'] = df.pred_log + test['pred'] = test.pred_log + tmp['lin_coef'] = 0 + tmp['log_coef'] = 1 + elif clf.coef_[1] < 0: + print('Only using lin') + df['pred'] = df.pred_lin + test['pred'] = test.pred_lin + tmp['lin_coef'] = 1 + tmp['log_coef'] = 0 + else: + print('Only using BOTH!') + df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) + test['pred'] = clf.predict( test[['pred_lin','pred_log']].values ) + coefsdf = coefsdf.append(tmp) + df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) + + # print out some summary stats + df['err'] = df[target_col] - df.pred + df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df) + df['q'] = df.q.apply(lambda x: int(round(x)) ) + df['pct_err'] = (df[target_col] / df.pred) - 1 + pe_mu = df.pct_err.mean() + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std() + if pe_sd != pe_sd: + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std() + df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) + df['pred_sd'] = df.pred * pe_sd + # print(df.groupby('q')[['err','pred',target_col]].mean()) + print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean()) + print(df.groupby('q')[['err','pred',target_col]].mean()) + # df.err.mean() + # df[df.weight >= 3.5].err.mean() + df[df.pred < 200].err.mean() + df['collection'] = collection + print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) + salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) ) + + ############################################################ + # Create Predictions for Each NFT in The Collection # + ############################################################ + # test = merge(num_metadata, cat_metadata, ['collection','token_id']) + # for c in num_features: + # test[c] = test[c].apply(lambda x: just_float(x) ) + # tail = df.sort_values('timestamp').tail(1) + # test.loc[ test.token_id == '903', 'nft_rank' ] = 18 + # for c in [ 'std_timestamp','mn_20','log_mn_20' ]: + # if c in tail.columns: + # test[c] = tail[c].values[0] + # test = standardize_df(test, pred_cols, df) + + # test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values) + # test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) + # test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values) + # test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l + + # test['pred_price'] = test.pred.apply(lambda x: x if x < 400 else (x-400)**0.96 + 400 ) + def f(p): + c = CUR_FLOOR * 2.5 + return( p if p <= c else c+((p-c) ** 0.95) ) + test['pred_price'] = test.pred.apply(lambda x: f(x) ) + len(test[test.pred <= CUR_FLOOR * 1.01]) + len(test[test.pred <= CUR_FLOOR * 1.02]) + if not check_exclude: + test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) ) + len(test[test.pred_price <= CUR_FLOOR]) + test['pred_sd'] = test.pred_price * pe_sd + test = test.sort_values(['collection','token_id']) + test['rk'] = test.pred_price.rank(ascending=0, method='first') + test['collection'] = collection + pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ).drop_duplicates(subset=['collection','token_id'], keep='last') - ############################################################ - # Create Predictions for Each NFT in The Collection # - ############################################################ - test = merge(num_metadata, cat_metadata, ['collection','token_id']) - for c in num_features: - test[c] = test[c].apply(lambda x: just_float(x) ) - tail = df.sort_values('timestamp').tail(1) - test.loc[ test.token_id == '903', 'nft_rank' ] = 18 - test[test.token_id=='903'] - for c in [ 'std_timestamp','mn_20','log_mn_20' ]: - if c in tail.columns: - test[c] = tail[c].values[0] - test = standardize_df(test, pred_cols, df) + imp = [] + for c in dummies.columns: + md = test[test[c] == 1].pred_price.median() + md_0 = test.pred_price.quantile(0.475) + imp += [[ collection, c, md_0, md ]] + # imp = pd.DataFrame(imp, columns=['collection','feature_name','']) + imp = pd.DataFrame(imp, columns=['collection','col','col_md','md']).sort_values('md', ascending=0) + imp['pct_vs_baseline'] = ((imp.md / imp.col_md) - 1).apply(lambda x: max(0, x)) + imp['feature_name'] = imp.col.apply(lambda x: re.split('_', x)[0] ) + imp['feature_value'] = imp.col.apply(lambda x: re.split('_', x)[1] if '_' in x else None ) + feature_values = feature_values.append(imp[['collection','feature_name','feature_value','pct_vs_baseline']]) - test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values) - test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) - test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values) - test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l + cols = metadata.feature_name.unique() + cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ] + exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else [] + for c in cols: + cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']] + l = len(cur.token_id.unique()) + if c in exclude: + cur['rarity'] = None + else: + g = cur.groupby('feature_value').token_id.count().reset_index() + g['rarity'] = g.token_id / l + cur = merge(cur, g[['feature_value','rarity']]) + attributes = attributes.append(cur) + + attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title().strip() ) + attributes['feature_value'] = attributes.feature_value.apply(lambda x: str(x).strip() ) + sorted(attributes['feature_name'].unique()) + if len(feature_values): + feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) + # feature_values = pd.read_csv('./data/feature_values.csv') + feature_values = feature_values.merge(attributes[['collection','feature_name']].drop_duplicates()) + # n = feature_values[['collection', 'feature_name']].drop_duplicates().groupby(['collection']).feature_name.count().reset_index().rename(columns={'feature_name': 'n'}) + # feature_values = feature_values.merge(n) + # feature_values['pct_vs_baseline'] = feature_values.pct_vs_baseline / feature_values.n + # del feature_values['n'] + feature_values[ (feature_values.collection == 'Solana Monkey Business') & (feature_values.feature_name == 'Clothes') ] + feature_values[ (feature_values.collection == 'Solana Monkey Business') & (feature_values.feature_name == 'Clothes') & (feature_values.feature_value == 'Poncho') ] + attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == '1') ] + attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == 1) ] - test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) - if not check_exclude: - test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) ) - test['pred_sd'] = test.pred_price * pe_sd - test = test.sort_values(['collection','token_id']) - test['rk'] = test.pred_price.rank(ascending=0, method='first') - test['collection'] = collection - pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ) + coefsdf.to_csv('./data/coefsdf.csv', index=False) + salesdf.to_csv('./data/model_sales.csv', index=False) + old = pd.read_csv('./data/pred_price copy.csv') + old['token_id'] = old.token_id.astype(str) + old = pred_price.merge(old, on=['collection','token_id']) + old['ratio'] = old.pred_price_x / old.pred_price_y + old = old.sort_values('ratio') + old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ] + m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))] + m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index() + m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique()) + m_p.head() + old = old.merge(m_p, on=['collection','token_id']) + old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]] + old.to_csv('~/Downloads/tmp1.csv', index=False) + pred_price.head() + old[old.token_id == '4857'] + old.head() + old.tail() - cols = metadata.feature_name.unique() - cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ] - exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else [] - for c in cols: - cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']] - l = len(cur.token_id.unique()) - if c in exclude: - cur['rarity'] = None - else: - g = cur.groupby('feature_value').token_id.count().reset_index() - g['rarity'] = g.token_id / l - cur = merge(cur, g[['feature_value','rarity']]) - attributes = attributes.append(cur) + # nft_rank = m_df[m_df.feature_name=='nft_rank'][['collection','token_id','feature_value']].rename(columns={'feature_value': 'nft_rank'}) + # nft_rank['token_id'] = nft_rank.token_id.astype(str) + # pred_price['token_id'] = pred_price.token_id.astype(str) + # pred_price = pred_price.merge(nft_rank, how='left', on=['collection','token_id']) + pred_price.to_csv('./data/pred_price.csv', index=False) + # pred_price = pd.read_csv('./data/pred_price.csv') + pred_price.groupby('collection')[['pred_price']].min() + attributes.to_csv('./data/attributes.csv', index=False) + attributes = pd.read_csv('./data/attributes.csv') + attributes[attributes.rarity.isnull()] + feature_values.to_csv('./data/feature_values.csv', index=False) - attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) - sorted(attributes['feature_name'].unique()) - if len(feature_values): - feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) - - coefsdf.to_csv('./data/coefsdf.csv', index=False) - salesdf.to_csv('./data/model_sales.csv', index=False) - old = pd.read_csv('./data/pred_price copy.csv') - old['token_id'] = old.token_id.astype(str) - old = pred_price.merge(old, on=['collection','token_id']) - old['ratio'] = old.pred_price_x / old.pred_price_y - old = old.sort_values('ratio') - old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ] - m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))] - m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index() - m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique()) - m_p.head() - old = old.merge(m_p, on=['collection','token_id']) - old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]] - old.to_csv('~/Downloads/tmp1.csv', index=False) - pred_price.head() - old[old.token_id == '4857'] - old.head() - old.tail() - - pred_price.to_csv('./data/pred_price.csv', index=False) - attributes.to_csv('./data/attributes.csv', index=False) - attributes[attributes.rarity.isnull()] - feature_values.to_csv('./data/feature_values.csv', index=False) - - # metadata = pd.read_csv('./data/metadata.csv') - # metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x)) - # metadata['token_id'] = metadata.token_id.astype(str) - # metadata.head() - # nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'}) - # nft_rank['feature_name'] = 'nft_rank' - # metadata = metadata[metadata.feature_name != 'nft_rank'] - # nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates()) - # metadata = metadata.append(nft_rank) - # metadata.to_csv('./data/metadata.csv', index=False) + # metadata = pd.read_csv('./data/metadata.csv') + # metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x)) + # metadata['token_id'] = metadata.token_id.astype(str) + # metadata.head() + # nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'}) + # nft_rank['feature_name'] = 'nft_rank' + # metadata = metadata[metadata.feature_name != 'nft_rank'] + # nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates()) + # metadata = metadata.append(nft_rank) + # metadata.to_csv('./data/metadata.csv', index=False) - feature_values.to_csv('./data/feature_values.csv', index=False) + feature_values.to_csv('./data/feature_values.csv', index=False) - if check_exclude: - salesdf['rat'] = salesdf.price / salesdf.pred - salesdf['dff'] = salesdf.price - salesdf.pred - salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) - salesdf['rat'] = salesdf.pred / salesdf.price - salesdf['dff'] = salesdf.pred - salesdf.price - salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) - salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0)) - print(salesdf.exclude_1.mean()) - print(salesdf.exclude_2.mean()) - print(salesdf.exclude.mean()) - salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] - salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) + if True or check_exclude: + exclude = pd.read_csv('./data/exclude.csv') + salesdf['rat'] = salesdf.price / salesdf.pred + salesdf['dff'] = salesdf.price - salesdf.pred + salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['rat'] = salesdf.pred / salesdf.price + salesdf['dff'] = salesdf.pred - salesdf.price + salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0)) + print(salesdf.exclude_1.mean()) + print(salesdf.exclude_2.mean()) + print(salesdf.exclude.mean()) + salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] + exclude = exclude.append(salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]]) + # salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) + exclude.to_csv('./data/exclude.csv', index=False) + +# train_model(True, False) +# train_model(False, False) +train_model(False, True) -train_model(True, False) -train_model(False, True) \ No newline at end of file diff --git a/solana_model_old.py b/solana_model_old.py index f4002140..a8d6aef0 100644 --- a/solana_model_old.py +++ b/solana_model_old.py @@ -1,3 +1,4 @@ +import collections import os import re import json @@ -5,570 +6,679 @@ import warnings import requests import numpy as np import pandas as pd +import kutils as ku import urllib.request import tensorflow as tf import snowflake.connector + +from curses import meta +from copy import deepcopy from datetime import datetime -from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import LinearRegression, RidgeCV, Lasso +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor +from sklearn.linear_model import LinearRegression, RidgeCV, Lasso, Ridge from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV +os.chdir('/Users/kellenblumberg/git/nft-deal-score') +from scrape_sol_nfts import clean_name + warnings.filterwarnings('ignore') -os.chdir('/Users/kellenblumberg/git/nft-deal-score') - -CHECK_EXCLUDE = False -CHECK_EXCLUDE = True - -# Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400 ################################### # Define Helper Functions # ################################### def standardize_df(df, cols, usedf=None, verbose=False): - for c in cols: - if type(usedf) != type(pd.DataFrame()): - usedf = df - mu = usedf[c].mean() - sd = usedf[c].std() - if verbose: - print(c) - if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: - df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) - else: - df['std_{}'.format(c)] = (df[c] - mu) / sd - return(df) + for c in cols: + if type(usedf) != type(pd.DataFrame()): + usedf = df + mu = usedf[c].mean() + sd = usedf[c].std() + if verbose: + print(c) + if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0: + # df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 ) + df['std_{}'.format(c)] = df[c] + else: + df['std_{}'.format(c)] = (df[c] - mu) / sd + return(df) + +def merge(left, right, on=None, how='inner', ensure=True, verbose=True): + df = left.merge(right, on=on, how=how) + if len(df) != len(left) and (ensure or verbose): + print('{} -> {}'.format(len(left), len(df))) + cur = left.merge(right, on=on, how='left') + cols = set(right.columns).difference(set(left.columns)) + print(cols) + col = list(cols)[0] + missing = cur[cur[col].isnull()] + print(missing.head()) + if ensure: + assert(False) + return(df) def just_float(x): - x = re.sub('[^\d\.]', '', str(x)) - return(float(x)) + x = re.sub('[^\d\.]', '', str(x)) + return(float(x)) def calculate_percentages(df, cols=[]): - add_pct = not 'pct' in df.columns - if not len(cols): - cols = df.columns - if add_pct: - df['pct'] = 1 - for c in cols: - g = df[c].value_counts().reset_index() - g.columns = [ c, 'N' ] - col = '{}_pct'.format(c) - g[col] = g.N / g.N.sum() - df = df.merge( g[[ c, col ]] ) - if add_pct: - df['pct'] = df.pct * df[col] - return(df) + add_pct = not 'pct' in df.columns + if not len(cols): + cols = df.columns + if add_pct: + df['pct'] = 1 + for c in cols: + g = df[c].value_counts().reset_index() + g.columns = [ c, 'N' ] + col = '{}_pct'.format(c) + g[col] = g.N / g.N.sum() + df = df.merge( g[[ c, col ]] ) + if add_pct: + df['pct'] = df.pct * df[col] + return(df) -exclude = [ - # (collection, token_id, price) - ( 'aurory', 2239, 3500 ) - # ( 'aurory', 856, 150 ) - # ( 'aurory', 4715, 500 ) - # ( 'aurory', 5561, 298 ) - # ( 'aurory', 5900, 199 ) - # ( 'aurory', 3323, 138 ) -] -s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) -s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head() -print(len(s_df[s_df.collection == 'Levana Dragon Eggs'])) -print(s_df.groupby('collection').token_id.count()) -s_df.collection.unique() -s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] -s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] -s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] -for e in exclude: - s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] -s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] +def get_sales(check_exclude = True, exclude=[]): -# exclude wierd data points -if not CHECK_EXCLUDE: - exclude = pd.read_csv('./data/exclude.csv') - s_df = s_df.merge(exclude, how='left') - s_df = s_df[s_df.exclude.isnull()] - del s_df['exclude'] + s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) + s_df['token_id'] = s_df.token_id.astype(str) + s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) + s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] + s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] + s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] + for e in exclude: + s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] + s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] -m_df = pd.read_csv('./data/metadata.csv') -m_df['token_id'] = m_df.token_id.astype(str) -tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])] -tmp['tmp'] = tmp.token_id.astype(int) -tmp.groupby('collection').tmp.max() -m_df.head() -# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') ) -s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) -s_df['timestamp'] = s_df.block_timestamp.astype(int) -# del metadata['price'] -# del metadata['last_sale'] -s_df = s_df.sort_values(['collection','block_timestamp']) -s_df['mn_20'] = s_df.groupby('collection').price.shift(1) -s_df = s_df.sort_values(['collection','block_timestamp']) -s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) -s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago']) + # exclude wierd data points + if not check_exclude: + exclude = pd.read_csv('./data/exclude.csv') + exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) + exclude['token_id'] = exclude.token_id.astype(str) + s_df = s_df.merge(exclude, how='left') + s_df = s_df[s_df.exclude.isnull()] + del s_df['exclude'] -s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True) -s_df = s_df.sort_values(['collection','block_timestamp']) -# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True) -s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) -# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ] + ########################### + # Calculate Floor # + ########################### + s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) + s_df['timestamp'] = s_df.block_timestamp.astype(int) + s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) -s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ] -s_df = s_df.sort_values(['collection','block_timestamp']) -s_df['mn_20'] = s_df.groupby('collection').price.shift(1) -s_df = s_df.sort_values(['collection','block_timestamp']) -# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True) -s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) -s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40) -s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price') -s_df['tmp'] = s_df.mn_20 / s_df.md_20 + # lowest price in last 20 sales + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) -tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']] -tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) -tmp = tmp.groupby('date').mn_20.median().reset_index() -tmp.to_csv('~/Downloads/tmp.csv', index=False) + # exclude sales that are far below the existing floor + s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] -s_df['tmp'] = s_df.price / s_df.mn_20 -s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']] -s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']] -s_df.groupby('collection').tmp.median() -s_df.groupby('collection').tmp.mean() + # 10%ile of last 20 sales + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + s_df = s_df.sort_values(['collection','block_timestamp']) + s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) + s_df['sim'] = 0 + s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) + s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) + return(s_df) -s_df.sort_values('tmp').head() -s_df['tmp'] = s_df.price / s_df.mn_20 -s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False) -s_df.groupby('collection').tmp.median() -s_df.groupby('collection').tmp.mean() -s_df.sort_values('tmp', ascending=0).head() -s_df.head(21) -m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] -# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() ) -# m_df.feature_value.unique() -pred_cols = {} -metadata = {} -sales = {} -collection_features = {} -m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id') -c = 'Levana Dragon Eggs' -# pred_cols[c] -EXCLUDE_COLS = { - # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] - 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count'] -} -NUMERIC_COLS = { - 'Levana Dragon Eggs': ['rank','score','pct','collection_rank','weight','temperature'] -} -for c in s_df.collection.unique(): - print('Building {} model'.format(c)) - exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else [] - n_cols = NUMERIC_COLS[c] if c in NUMERIC_COLS.keys() else [] - exclude = [ x for x in exclude if not x in n_cols ] - o_cols = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if (not x in exclude) and not (x in n_cols) ]) +def train_model(check_exclude, supplement_with_listings): + exclude = [ + ( 'aurory', 2239, 3500 ) + ] + s_df = get_sales(check_exclude, exclude) + # s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) + # s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x)) + # s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])] + # s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ] + # s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]] + # for e in exclude: + # s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )] + # s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ] - sales[c] = s_df[ s_df.collection == c ] - pred_cols[c] = sorted( n_cols + o_cols ) - collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ] - metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ] + # # exclude wierd data points + # if not check_exclude: + # exclude = pd.read_csv('./data/exclude.csv') + # exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x)) + # s_df = s_df.merge(exclude, how='left') + # s_df = s_df[s_df.exclude.isnull()] + # del s_df['exclude'] - # tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] ) - metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() - metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c] - - features = collection_features[c] - cur = metadata[c] - # cur = cur.dropna(subset=features) - for f in features: - if type(cur[f].values[0] == str): - cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) ) - cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip()) - cur = cur.replace('', 'Default') - # if not 'pct' in cur.columns: - cur = calculate_percentages( cur, o_cols ) - dummies = pd.get_dummies(cur[o_cols]) - # feature_cols = dummies.columns - cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) - metadata[c] = cur - # pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns) - # cols = [ 'collection_rank' ] - # cols = [ ] - # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns) - # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + list(dummies.columns) - pred_cols[c] = n_cols + list(dummies.columns) - -# collection_features = { -# 'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ] -# , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ] -# , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ] -# } - -coefsdf = pd.DataFrame() -salesdf = pd.DataFrame() -attributes = pd.DataFrame() -pred_price = pd.DataFrame() -feature_values = pd.DataFrame() -collections = sorted(metadata.keys()) -collection = 'Galactic Punks' -tokens = pd.read_csv('./data/tokens.csv') -collection = 'Levana Dragon Eggs' -# for collection in s_df.collection.unique(): -for collection in ['Levana Dragon Eggs']: - # collection = 'LunaBulls' - # collection = 'smb' - # collection = 'aurory' - # collection = 'meerkatmillionaires' - print('Working on collection {}'.format(collection)) - p_metadata = metadata[collection] - if 'attribute_count' in p_metadata.columns: - p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int) - - p_sales = sales[collection] - # specify the predictive features - p_pred_cols = pred_cols[collection] - if collection == 'Levana Dragon Eggs': - p_pred_cols += [ 'transformed_collection_rank' ] - p_features = collection_features[collection] - p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - for c in [ 'rank','score' ]: - p_metadata[c] = p_metadata[c].astype(float) - # p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - # p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) ) - p_sales['contract_address'] = '' - p_metadata['contract_address'] = '' - - # remove 1 columns for each group (since they are colinear) - # exclude = [] - # for f in p_features: - # e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1] - # exclude.append(e) - - df = p_sales.merge(p_metadata, on=['token_id','contract_address']) - df = df[df.mn_20.notnull()] - target_col = 'adj_price' - df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) - # df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 ) - # tmp = df[['block_timestamp','mn_20']].copy() - # tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] ) - # tmp = tmp.groupby('tmp').mn_20.median().reset_index() - # tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False) - # df['timestamp'] = df.block_timestamp.astype(int) - df = df[df[target_col].notnull()] - df = df.reset_index(drop=True) - df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) ) - df['rel_price_0'] = df[target_col] - df.mn_20 - df['rel_price_1'] = df[target_col] / df.mn_20 - df = df[df.mn_20 > 0] - df['log_mn_20'] = np.log(df.mn_20) - print('Training on {} sales'.format(len(df))) - # df['price_median'] = df.groupby('token_id').price.median() - - # standardize columns to mean 0 sd 1 - len(p_pred_cols) - n_cols = NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else [] - for c in n_cols: - df[c] = df[c].apply(lambda x: just_float(x) ) - if collection == 'Levana Dragon Eggs': - df['transformed_collection_rank'] = df.collection_rank.apply(lambda x: (1.0/ x)**2 ) - df = standardize_df(df, p_pred_cols) - std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ] - # p_pred_cols = [ c for c in p_pred_cols if not c in exclude ] - std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ] - df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) - # df.sort_values('block_timestamp').head(10)[['price','tx_id']] - # df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values - # df = df[df.price >= 1] - - ######################### - # Run the Model # - ######################### - len(df) - len(df.dropna(subset=std_pred_cols)) - tmp = df[std_pred_cols].count().reset_index() - tmp.columns = ['a','b'] - tmp.sort_values('b').head(20) - rem = list(tmp[tmp.b==0].a.values) - std_pred_cols = [ c for c in std_pred_cols if not c in rem ] - mn = df.timestamp.min() - mx = df.timestamp.max() - df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) ) - X = df[std_pred_cols].values - mu = df.log_price.mean() - sd = df.log_price.std() - df['std_log_price'] = (df.log_price - mu) / sd - # y = df.std_log_price.values - # y = df[target_col].values - # y = df.rel_price_1.values - y_0 = df.rel_price_0.values - y_1 = df.rel_price_1.values - # y_log = df.log_price.values - - clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_lin.fit(X, y_0, df.weight.values) - coefs = [] - for a, b in zip(std_pred_cols, clf_lin.coef_): - coefs += [[a,b]] - coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) - coefs.to_csv('~/Downloads/tmp.csv', index=False) - df['pred_lin'] = clf_lin.predict(X) - df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 - df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) ) - # df['err_lin'] = abs(df.pred_lin - df.price ) - # df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50) - df.head() - clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) - clf_log.fit(X, y_1, df.weight.values) - coefs = [] - for a, b in zip(std_pred_cols, clf_log.coef_): - coefs += [[a,b]] - coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) - coefs.to_csv('~/Downloads/tmp.csv', index=False) - df['pred_log'] = clf_log.predict(X) - df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 - df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) ) - df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50) - df['err'] = df.err_lin * df.err_log - - df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50) - df['collection'] = collection - - # df['pred_lin'] = clf_lin.predict(X) - # df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 - # df['pred_log'] = np.exp(clf_log.predict(X)) - # df['pred_log'] = clf_log.predict(X) - # df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 - clf = LinearRegression(fit_intercept=False) - clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values ) - print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) - l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] - tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price']) - if clf.coef_[0] < 0: - print('Only using log') - df['pred'] = df.pred_log - tmp['lin_coef'] = 0 - tmp['log_coef'] = 1 - elif clf.coef_[1] < 0: - print('Only using lin') - df['pred'] = df.pred_lin - tmp['lin_coef'] = 1 - tmp['log_coef'] = 0 - else: - print('Only using BOTH!') - df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) - coefsdf = coefsdf.append(tmp) - df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) - df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]] - # df[df.block_timestamp>='2021-10-01'].err.mean() - df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]] - df.sort_values('price', ascending=0).head(20).tx_id.values - - # print(np.mean(y)) - # print(np.mean(clf.predict(X))) - - # # run neural net - # model = tf.keras.models.Sequential([ - # tf.keras.layers.Dense(9, activation='relu') - # , tf.keras.layers.Dropout(.2) - # , tf.keras.layers.Dense(3, activation='relu') - # , tf.keras.layers.Dropout(.2) - # , tf.keras.layers.Dense(1, activation='linear') - # ]) - # model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025)) - # model.fit(X, y, epochs=500, validation_split=0.3) - - # df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu) - # df['pred'] = model.predict(df[std_pred_cols].values) - # ratio = df.price.mean() / df.pred.mean() - # print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1))) - - # checking errors - # df['pred'] = df.pred * ratio - df['err'] = df[target_col] - df.pred - df['q'] = df.pred.rank() * 10 / len(df) - df['q'] = df.q.apply(lambda x: int(round(x)) ) - df['pct_err'] = (df[target_col] / df.pred) - 1 - pe_mu = df.pct_err.mean() - pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std() - pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std() - df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) - df['pred_sd'] = df.pred * pe_sd - print(df.groupby('q')[['err','pred',target_col]].mean()) - print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean()) - # df.err.mean() - # df[df.weight >= 3.5].err.mean() - df['collection'] = collection - print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) - salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) ) - - # create the attributes dataframe - for f in p_features: - if f and '{}_pct'.format(f) in p_metadata.columns: - cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]] - cur.columns = [ 'token_id', 'value','rarity' ] - cur['feature'] = f - cur['collection'] = collection - attributes = attributes.append(cur) - - # create predictions for each NFT in the collection - test = p_metadata.copy() - for c in n_cols: - test[c] = test[c].apply(lambda x: just_float(x) ) - if collection in [ 'Levana Dragon Eggs' ]: - test['transformed_collection_rank'] = test.collection_rank.apply(lambda x: (1.0 / x) ** 2 ) - tail = df.sort_values('timestamp').tail(1) - for c in [ 'std_timestamp','mn_20','log_mn_20' ]: - if c in tail.columns: - test[c] = tail[c].values[0] - test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df, True) - # test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values ) - # test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values )) - - test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values) - test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) - # test['pred_lin'] = df.pred_lin + df.mn_20 - # df['pred_log'] = np.exp(clf_log.predict(X)) - test['pred_log'] = clf_log.predict(test[std_pred_cols].values) - test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l - - test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) - # test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio - test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) ) - if not CHECK_EXCLUDE: - test['pred_price'] = test.pred.apply(lambda x: (x*0.985) ) - test['pred_sd'] = test.pred * pe_sd - test['rk'] = test.pred.rank(ascending=0, method='first') - test['collection'] = collection - pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') ) - # print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price')) + ######################### + # Load Metadata # + ######################### + m_df = pd.read_csv('./data/metadata.csv') + m_df['token_id'] = m_df.token_id.astype(str) + m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x)) + m_df['token_id'] = m_df.token_id.astype(str) + # remove ones that are not actually metadata + m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ] + m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x ) + m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')] + sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique()) - ############################## - # Feature Importance # - ############################## - coefs = [] - for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_): - coefs += [[ collection, a, b, c ]] - coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef']) - # coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() ) - # coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) ) - # coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] ) - # mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index() - # mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ] - # coefs = coefs.merge(mn) - # coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef - # coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef - # coefs - # g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates() - # g['value'] = g.value.astype(str) - # len(coefs) - # g = coefs.merge(g, how='left') - # g[g.rarity.isnull()] - # len(g) - # coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] ) - # coefs.sort_values('lin_coef').tail(20) - - # TODO: pick the most common one and have that be the baseline - most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1) - most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 ) - mc = most_common.col.unique() - data = [] - for c0 in std_pred_cols_0: - if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']: - continue - f = '_'.join(re.split('_', c0)[1:-1]) - v = re.split('_', c0)[-1] - rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0] - # avg = p_metadata['{}_pct'.format(f)].mean() - # avg_pct = df.pct.mean() - # pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std() - r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean() - s = df[df['{}_{}'.format(f, v)]==1].std_score.mean() - if r == r and s == s: - datum = [ c0, rarity ] - for c1 in std_pred_cols: - datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 ) - data += [ datum ] - - importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols) - sorted(importance.feature.unique()) - importance[importance.feature == 'std_fur_/_skin_Leopard'] - if 'std_timestamp' in df.columns: - importance['std_timestamp'] = df.std_timestamp.max() - # importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values ) - # importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values )) - - importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values) - importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l) - # importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l) - importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values) - importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l - # importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l - - importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values ) - # importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu) - importance = importance.sort_values('pred', ascending=0) - importance.head()[['feature','pred']] - importance[importance.feature == 'std_fur_/_skin_Leopard'] - importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x)) - importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1]) - importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1])) - mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'}) - importance = importance.merge(mn) - importance['pred_vs_baseline'] = importance.pred - importance.baseline - importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1 - importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity') - importance['collection'] = collection - importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']] - tmp = importance[std_pred_cols].mean().reset_index() - tmp.columns = [ 'a', 'b' ] - tmp = tmp.sort_values('b') - feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']]) - -attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() ) -feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() ) - -pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]] + ##################################### + # Exclude Special LunaBulls # + ##################################### + tokens = pd.read_csv('./data/tokens.csv') + tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x)) + tokens.token_id.unique() + lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique() + m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ] + s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ] + s_df = s_df.drop_duplicates(subset=['collection','token_id','price']) -coefsdf.to_csv('./data/coefsdf.csv', index=False) -salesdf.to_csv('./data/model_sales.csv', index=False) -pred_price.to_csv('./data/pred_price.csv', index=False) -attributes.to_csv('./data/attributes.csv', index=False) -feature_values.to_csv('./data/feature_values.csv', index=False) + ########################### + # Calculate Floor # + ########################### + # s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') ) + # s_df['timestamp'] = s_df.block_timestamp.astype(int) + # s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) -pred_price = pd.read_csv('./data/pred_price.csv') -tokens = pd.read_csv('./data/tokens.csv') -rem = tokens[tokens.clean_token_id>=10000].token_id.unique() -l0 = len(pred_price) -pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ] -l1 = len(pred_price) -pred_price.to_csv('./data/pred_price.csv', index=False) + # # lowest price in last 20 sales + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True) -# listings = pd.read_csv('./data/listings.csv') -# listings['token_id'] = listings.token_id.astype(int) + # # exclude sales that are far below the existing floor + # s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ] -# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) -# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ]) -# tmp['token_id'] = tmp.token_id.astype(int) -# tmp = tmp.merge(listings[['collection','token_id','price']]) -# tmp.sort_values('pred_price', ascending=0) + # # 10%ile of last 20 sales + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection').price.shift(1) + # s_df = s_df.sort_values(['collection','block_timestamp']) + # s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True) + # s_df['sim'] = 0 + # s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] ) + # s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False) -if CHECK_EXCLUDE: - salesdf['rat'] = salesdf.price / salesdf.pred - salesdf['dff'] = salesdf.price - salesdf.pred - salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) - salesdf['rat'] = salesdf.pred / salesdf.price - salesdf['dff'] = salesdf.pred - salesdf.price - salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) - salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0)) - print(salesdf.exclude_1.mean()) - print(salesdf.exclude_2.mean()) - print(salesdf.exclude.mean()) - salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] - salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) + if supplement_with_listings: + pred_price = pd.read_csv('./data/pred_price.csv') + pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x)) + listings = pd.read_csv('./data/listings.csv') + listings['collection'] = listings.collection.apply(lambda x: clean_name(x)) + listings['block_timestamp'] = s_df.block_timestamp.max() + floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']] + tmp = merge(listings, pred_price, ensure=False) + tmp = tmp[tmp.price < tmp.pred_price] + tmp['timestamp'] = tmp.block_timestamp.astype(int) + tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int) + tmp = merge(tmp, floor) -attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ] -feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ] -sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique()) + n = round(len(s_df) / 5000) + n = max(1, min(2, n)) + # n = 1 + for _ in range(n): + s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) + # tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price] + # s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) + # tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price] + # tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]]) -pred_price[pred_price.collection == 'peskypenguinclub'].head() \ No newline at end of file + + ########################### + # Calculate Floor # + ########################### + coefsdf = pd.DataFrame() + salesdf = pd.DataFrame() + attributes = pd.DataFrame() + pred_price = pd.DataFrame() + feature_values = pd.DataFrame() + # non-binary in model: collection_rank, temperature, weight + # non-binary in model; exclude from rarity: pct, rank, score + # exclude from model: lucky_number, shower + # exclude from model and rarity %: meteor_id, attribute_count, cracking_date + ALL_NUMERIC_COLS = ['rank','score','pct'] + ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'] + MODEL_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight'] + , 'Solana Monkey Business': ['Clothes_Diamond'] + } + MODEL_INCLUDE_COLS = { + # 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black'] + 'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana'] + } + RARITY_EXCLUDE_COLS = { + # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature'] + 'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group'] + } + NUMERIC_COLS = { + 'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank'] + } + ATT_EXCLUDE_COLS = { + 'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group'] + } + collection = 'Solana Monkey Business' + # for collection in s_df.collection.unique(): + for collection in [ 'Solana Monkey Business' ]: + print('Working on collection {}'.format(collection)) + sales = s_df[ s_df.collection == collection ] + metadata = m_df[ m_df.collection == collection ] + metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False) + metadata[metadata.token_id == '1'] + metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() ) + metadata[metadata.token_id == '1'] + metadata[metadata.feature_name == 'rank'] + metadata.feature_name.unique() + metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')] + + # categorize columns + all_names = sorted(metadata.feature_name.unique()) + model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else [] + num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS) + num_features = [ x for x in num_features if x in metadata.feature_name.unique() ] + num_metadata = metadata[metadata.feature_name.isin(num_features)] + num_metadata[num_metadata.feature_name == 'nft_rank'] + cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ]) + cat_metadata = metadata[metadata.feature_name.isin(cat_features)] + + # create dummies for binary variables + num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + num_metadata.columns = [ 'collection','token_id' ] + num_features + + # create dummies for binary variables + cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index() + cat_metadata.columns = [ 'collection','token_id' ] + cat_features + cat_metadata = calculate_percentages( cat_metadata, cat_features ) + dummies = pd.get_dummies(cat_metadata[cat_features]) + dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False) + if collection == 'Solana Monkey Business': + dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int) + dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int) + dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int) + dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int) + # dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int) + # dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int) + # dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int) + dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int) + dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int) + del dummies['matching_white'] + del dummies['matching_black'] + cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1) + del cat_metadata['pct'] + + for c in model_exclude: + if c in dummies.columns: + del dummies[c] + pred_cols = num_features + list(dummies.columns) + + # create training df + df = merge(sales, num_metadata, ['collection','token_id'], ensure=False) + df = merge(df, cat_metadata, ['collection','token_id']) + df[df.adj_nft_rank_0 == 'None'] + df[df.adj_nft_rank_0 == 'None'][['collection','token_id','nft_rank','adj_nft_rank_0']] + df.adj_nft_rank_0.unique() + for c in num_features: + df[c].unique() + df[df.nft_rank == 'None'] + df[df[c] == 'None'][[ 'nft_rank' ]] + df[c] = df[c].apply(lambda x: just_float(x)) + df.sort_values('price', ascending=0)[['price']].head(20) + # df.groupby(['rarity','weight']).price.mean() + + # create target cols + target_col = 'adj_price' + df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 ) + df = df[df[target_col].notnull()] + df['log_price'] = df[target_col].apply(lambda x: np.log(x) ) + df['rel_price_0'] = df[target_col] - df.mn_20 + df['rel_price_1'] = df[target_col] / df.mn_20 + df = df[df.mn_20 > 0] + df['log_mn_20'] = np.log(df.mn_20) + print('Training on {} sales'.format(len(df))) + df = standardize_df(df, pred_cols) + + std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ] + std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ] + + ######################### + # Run the Model # + ######################### + tmp = df[std_pred_cols].count().reset_index() + tmp.columns = ['a','b'] + tmp.sort_values('b').head(20) + rem = list(tmp[tmp.b==0].a.values) + std_pred_cols = [ c for c in std_pred_cols if not c in rem ] + if collection == 'Levana Dragon Eggs': + std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ] + mn = df.timestamp.min() + mx = df.timestamp.max() + df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) ) + if collection == 'Levana Dragon Eggs': + df['wt'] = 1 + # df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) ) + # df.sort_values('price', ascending=0)[['price','wt']].head(20) + # std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ] + cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ] + cur_std_pred_cols = deepcopy(std_pred_cols) + g = df[std_pred_cols].sum().reset_index() + g.columns = [ 'col','cnt' ] + g = g.sort_values('cnt') + g.head(20) + if collection == 'Solana Monkey Busines': + df.loc[ df.token_id == '903', 'nft_rank' ] = 18 + df[df.token_id=='903'] + df[df.token_id==903] + df = df.reset_index(drop=True) + X = df[cur_std_pred_cols].values + y_0 = df.rel_price_0.values + y_1 = df.rel_price_1.values + folds = ku.get_folds(len(X), 5) + for target_col in [ 'rel_price_0', 'rel_price_1' ]: + print('target_col = {}'.format(target_col)) + y_val = df[target_col].values + cur_err = 0 + for model in ['las','ridge','rfr','gbr']: + df, bst_p, bst_r = ku.get_bst_params( model, df, X, y_val, target_col, 'y_pred_{}_{}'.format(model, target_col[-1]), verbose = True, wt_col='wt' ) + # df['tmp'] = df.collection_rank.apply(lambda x: int((8888 - x)/1000) ) + # g = df.groupby('tmp').rel_price_0.mean().reset_index() + # g['g'] = g.tmp.apply(lambda x: (((1.42**(x**1.42)) - 1) / 20) + 0.13 ) + # g['g'] = g.tmp.apply(lambda x: 2**x ) + # g + + # run the linear model + # clf_lin = Lasso(alpha=1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + + # clf_lin = Ridge(alpha=1000) + # clf_lin = Ridge(alpha=100) + # clf_lin.fit(X, y_0, df.wt.values) + # clf_las = Lasso(alpha=1.5) + # clf_las.fit(X, y_0, df.wt.values) + # clf_rfr = RandomForestRegressor() + # clf_rfr.fit(X, y_0) + # clf_rfr.feature_importances_ + # imp = [] + # for a, b, c, d in zip(cur_std_pred_cols, clf_rfr.feature_importances_, clf_lin.coef_, clf_las.coef_): + # imp += [[a, b, abs(c), abs(d)]] + # imp = pd.DataFrame(imp, columns=['col','imp','lin','las']).sort_values('imp', ascending=0) + # imp['imp_rk'] = imp.imp.rank(ascending=0) + # imp['lin_rk'] = imp.lin.rank(ascending=0) + # imp['las_rk'] = imp.las.rank(ascending=0) + # imp['include'] = 0 + # imp.to_csv('~/Downloads/coef.csv', index=False) + # imp.head(50).tail(20) + # imp.head(40).tail(10) + # imp.head(50).tail(10) + # nft_rank should be negative + # adj_nft_rank_0 should be positive + # adj_nft_rank_1 should be positive + clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) + clf_lin = Ridge(alpha=30, fit_intercept=True) + clf_lin = Lasso(alpha=.225) + def get_coefs(cols, coef): + coefs = [] + for a, b in zip(cols, coef): + coefs += [[a,b]] + coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + # coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False) + coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x ) + # coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 ) + coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 else -1 , 1 ) + coefs['val'] = coefs.mult * coefs.coef + coefs = coefs.sort_values('val', ascending=0) + return(coefs) + + mn = -1 + print('Starting with {} cols'.format(len(cur_std_pred_cols))) + while mn < 0 or len(cur_std_pred_cols) > 140: + X = df[cur_std_pred_cols].values + clf_lin.fit(X, y_0, df.wt.values) + coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_) + tmp = coefs[coefs.tmp == True] + mn = min(coefs.val) if len(coefs) else 0 + if mn < 0: + cur_std_pred_cols.remove(coefs.col.values[-1]) + else: + cur_std_pred_cols.remove(coefs.col.values[-1]) + coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False) + len(coefs[coefs.coef !=0]) + # print(coefs[coefs.coef !=0]) + # print(len(coefs[coefs.coef !=0])) + INCLUDE_COLS = MODEL_INCLUDE_COLS[collection] if collection in MODEL_INCLUDE_COLS.keys() else [] + + # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) + + cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique()) + for c in INCLUDE_COLS: + if not c in cur_std_pred_cols: + cur_std_pred_cols.append(c) + lin_std_pred_cols = cur_std_pred_cols + X = df[cur_std_pred_cols].values + # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) + # clf_lin = Lasso(alpha=0.1) + clf_lin = Lasso(alpha=.1) + clf_lin.fit(X, y_0, df.wt.values) + coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_) + print(coefs[coefs.coef !=0]) + print(len(coefs[coefs.coef !=0])) + print(coefs[coefs.col.isin(INCLUDE_COLS)]) + coefs[coefs.coef !=0].to_csv('./data/coefs/{}_lin_coefs.csv'.format(collection), index=False) + df[df['std_Attribute Count_0']!=0] + df['std_Attribute Count_0'].unique() + coefs[coefs.col.isin(INCLUDE_COLS)] + df['pred'] = clf_lin.predict(X) + df['err'] = df.pred - df.rel_price_0 + df[df['std_Hat_Space Warrior Hair'] == 1][['pred',target_col]].mean() + df[df['std_Hat_Space Warrior Hair'] == 1].err.median() + tmp = [] + for c in std_pred_cols: + if len(df[df[c] == 1]): + mu = round(df[df[c] == 1].err.mean()) + md = round(df[df[c] == 1].err.median()) + n = len(df[df[c] == 1]) + tmp += [[ c, int(c in cur_std_pred_cols ), n, mu, md ]] + # print('{}: {}, {}, {}'.format(c, mu, md, n)) + tmp = pd.DataFrame(tmp, columns=['c','i','n','mu','md']).sort_values('mu') + tmp.to_csv('~/Downloads/tmp4.csv', index=False) + tmp[tmp.i == 0].head(8) + tmp[tmp.i == 0].tail(8) + 'std_Hat_Crown','std_Attribute Count_0','std_Hat_Space Warrior Hair','std_Eyes_Laser Eyes','std_Type_Solana','' + df[df['std_Hat_Space Warrior Hair'] == 1].err.mean() + df[df['std_Hat_Strawhat'] == 1][['pred','rel_price_0']].mean() + + df['pred_lin'] = clf_lin.predict(X) + df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20 + df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) ) + # df[df.genus_Titan==1][['rarity']] + # df[(df.rarity=='Legendary') | (df.genus=='Titan')][['genus','rarity']] + + # run the log model + # clf_log = Lasso(1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)]) + clf_log = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) + clf_log = Ridge(alpha=30) + clf_log = Lasso(0.003) + # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)]) + + mn = -1 + cur_std_pred_cols = deepcopy(std_pred_cols) + while mn < 0 or len(cur_std_pred_cols) > 140: + X = df[cur_std_pred_cols].values + clf_log.fit(X, y_1, df.wt.values) + coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) + tmp = coefs[coefs.tmp == True] + mn = min(tmp.coef) if len(tmp) else 0 + if mn < 0: + cur_std_pred_cols.remove(tmp.col.values[-1]) + else: + cur_std_pred_cols.remove(coefs.col.values[-1]) + coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) + coefs[coefs.coef !=0].to_csv('./data/coefs/{}_log_coefs.csv'.format(collection), index=False) + # print(coefs[coefs.coef !=0]) + len(coefs[coefs.coef !=0]) + # cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique()) + for c in INCLUDE_COLS: + if not c in cur_std_pred_cols: + cur_std_pred_cols.append(c) + log_std_pred_cols = cur_std_pred_cols + X = df[cur_std_pred_cols].values + clf_log = Lasso(0.001) + clf_log.fit(X, y_1, df.wt.values) + coefs = get_coefs(cur_std_pred_cols, clf_log.coef_) + print(coefs[coefs.coef !=0]) + print(len(coefs[coefs.coef !=0])) + print(coefs[coefs.col.isin(INCLUDE_COLS)]) + # clf_log.fit(X, y_1, df.wt.values) + # if collection == 'Levana Dragon Eggs': + # coefs = [] + # for a, b in zip(std_pred_cols, clf_lin.coef_): + # coefs += [[a,b]] + # coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0) + # coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False) + df['pred_log'] = clf_log.predict(X) + df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20 + df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) ) + df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50) + df['err'] = df.err_lin * df.err_log + + + # combine the models + clf = LinearRegression(fit_intercept=False) + clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values ) + df[['pred_lin','pred_log',target_col]].mean() + print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) )) + l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0] + tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price']) + if clf.coef_[0] < 0: + print('Only using log') + df['pred'] = df.pred_log + tmp['lin_coef'] = 0 + tmp['log_coef'] = 1 + elif clf.coef_[1] < 0: + print('Only using lin') + df['pred'] = df.pred_lin + tmp['lin_coef'] = 1 + tmp['log_coef'] = 0 + else: + print('Only using BOTH!') + df['pred'] = clf.predict( df[['pred_lin','pred_log']].values ) + coefsdf = coefsdf.append(tmp) + df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) ) + + # print out some summary stats + df['err'] = df[target_col] - df.pred + df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df) + df['q'] = df.q.apply(lambda x: int(round(x)) ) + df['pct_err'] = (df[target_col] / df.pred) - 1 + pe_mu = df.pct_err.mean() + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std() + pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std() + df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) ) + df['pred_sd'] = df.pred * pe_sd + # print(df.groupby('q')[['err','pred',target_col]].mean()) + print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean()) + print(df.groupby('q')[['err','pred',target_col]].mean()) + # df.err.mean() + # df[df.weight >= 3.5].err.mean() + df[df.pred < 200].err.mean() + df['collection'] = collection + print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2))) + salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) ) + + + ############################################################ + # Create Predictions for Each NFT in The Collection # + ############################################################ + test = merge(num_metadata, cat_metadata, ['collection','token_id']) + for c in num_features: + test[c] = test[c].apply(lambda x: just_float(x) ) + tail = df.sort_values('timestamp').tail(1) + test.loc[ test.token_id == '903', 'nft_rank' ] = 18 + test[test.token_id=='903'] + for c in [ 'std_timestamp','mn_20','log_mn_20' ]: + if c in tail.columns: + test[c] = tail[c].values[0] + test = standardize_df(test, pred_cols, df) + + test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values) + test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l) + test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values) + test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l + + test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values ) + if not check_exclude: + test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) ) + test['pred_sd'] = test.pred_price * pe_sd + test = test.sort_values(['collection','token_id']) + test['rk'] = test.pred_price.rank(ascending=0, method='first') + test['collection'] = collection + pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ) + + cols = metadata.feature_name.unique() + cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ] + exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else [] + for c in cols: + cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']] + l = len(cur.token_id.unique()) + if c in exclude: + cur['rarity'] = None + else: + g = cur.groupby('feature_value').token_id.count().reset_index() + g['rarity'] = g.token_id / l + cur = merge(cur, g[['feature_value','rarity']]) + attributes = attributes.append(cur) + + attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) + sorted(attributes['feature_name'].unique()) + if len(feature_values): + feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() ) + + coefsdf.to_csv('./data/coefsdf.csv', index=False) + salesdf.to_csv('./data/model_sales.csv', index=False) + old = pd.read_csv('./data/pred_price copy.csv') + old['token_id'] = old.token_id.astype(str) + old = pred_price.merge(old, on=['collection','token_id']) + old['ratio'] = old.pred_price_x / old.pred_price_y + old = old.sort_values('ratio') + old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ] + m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))] + m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index() + m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique()) + m_p.head() + old = old.merge(m_p, on=['collection','token_id']) + old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]] + old.to_csv('~/Downloads/tmp1.csv', index=False) + pred_price.head() + old[old.token_id == '4857'] + old.head() + old.tail() + + pred_price.to_csv('./data/pred_price.csv', index=False) + attributes.to_csv('./data/attributes.csv', index=False) + attributes[attributes.rarity.isnull()] + feature_values.to_csv('./data/feature_values.csv', index=False) + + # metadata = pd.read_csv('./data/metadata.csv') + # metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x)) + # metadata['token_id'] = metadata.token_id.astype(str) + # metadata.head() + # nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'}) + # nft_rank['feature_name'] = 'nft_rank' + # metadata = metadata[metadata.feature_name != 'nft_rank'] + # nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates()) + # metadata = metadata.append(nft_rank) + # metadata.to_csv('./data/metadata.csv', index=False) + + + feature_values.to_csv('./data/feature_values.csv', index=False) + + if check_exclude: + salesdf['rat'] = salesdf.price / salesdf.pred + salesdf['dff'] = salesdf.price - salesdf.pred + salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['rat'] = salesdf.pred / salesdf.price + salesdf['dff'] = salesdf.pred - salesdf.price + salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int) + salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0)) + print(salesdf.exclude_1.mean()) + print(salesdf.exclude_2.mean()) + print(salesdf.exclude.mean()) + salesdf[salesdf.token_id == '2239'][['collection','price','exclude']] + salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False) + +train_model(True, False) +train_model(False, True) \ No newline at end of file diff --git a/update.py b/update.py index 0bddaaa9..d6d0a976 100644 --- a/update.py +++ b/update.py @@ -69,20 +69,21 @@ sales.price.max() def add_model_sales(): sales = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'}) + print(sales.groupby('collection').token_id.count()) sales.token_id.unique() sales.groupby('collection').token_id.count() sales[sales.collection == 'Galactic Punks'] del sales['tx_id'] - old = pd.read_csv('./data/pred_price copy.csv').rename(columns={'rank':'nft_rank'}) old = pd.read_csv('./data/pred_price.csv').rename(columns={'rank':'nft_rank'}) + old = pd.read_csv('./data/pred_price copy.csv').rename(columns={'rank':'nft_rank'}) old.groupby('collection').token_id.count() sales['token_id'] = sales.token_id.astype(int).astype(str) old['token_id'] = old.token_id.astype(str) sales = sales.merge( old[['collection','token_id','nft_rank']] ) - sales.groupby('collection').token_id.count() sales.head() sales['block_timestamp'] = sales.block_timestamp.apply(lambda x: str(x)[:19] ) sales['price'] = sales.price.apply(lambda x: round(x, 2)) + print(sales.groupby('collection').token_id.count()) sales.to_csv('./data/model_sales.csv', index=False) @@ -105,13 +106,6 @@ def update_token_ids(): df = pd.read_csv('./data/{}.csv'.format(c)) df['token_id'] = df.token_id.apply(lambda x: str(int(float(x))) ) df['tmp'] = df.token_id.apply(lambda x: (str(x)[:5])) - df[(df.collection == 'Galactic Punks') & (df.price == 99)] - df[(df.collection == 'Galactic Punks') & (df.price == 99) & (df.tx_id == 'B57DB0555DED1D9593765EB9EF09796068268B91CF211CC5BF445AA0006205EC')] - df[(df.collection == 'Galactic Punks') & (df.price == 99) & (df.tx_id == 'B57DB0555DED1D9593765EB9EF09796068268B91CF211CC5BF445AA0006205EC')].token_id.values - tokens[(tokens.collection == 'Galactic Punks') ].token_id.values - tokens[(tokens.collection == 'Galactic Punks') & (tokens.token_id == '25984997114855597728010029317878710272') ].token_id.values - tokens[(tokens.token_id == '25984997114855597728010029317878710272') ].token_id.values - tokens[(tokens.token_id == '"25984997114855597728010029317878710272"') ].token_id.values df['tmp'] = df.token_id.apply(lambda x: x[:10] ) tokens['tmp'] = tokens.token_id.apply(lambda x: x[:10] ) len(tokens) @@ -152,5 +146,6 @@ def update_token_ids(): df[df.collection == 'Galactic Punks'] print(df.groupby('collection').token_id.count() ) df.to_csv('./data/{}.csv'.format(c), index=False) -update_token_ids() +update_token_ids() +add_model_sales() \ No newline at end of file diff --git a/viz/global.R b/viz/global.R index 1779ae2c..97c086c9 100644 --- a/viz/global.R +++ b/viz/global.R @@ -11,6 +11,10 @@ library(shinyjs) require(dplyr) library(htmlwidgets) library(reactable) +# library(promises) +# library(future) +# plan(multisession) + plotly.style <- list( plot_bgcolor = "rgba(0, 0, 0, 0)", diff --git a/viz/server.R b/viz/server.R index 31453380..b3cea0ed 100644 --- a/viz/server.R +++ b/viz/server.R @@ -1,6 +1,8 @@ server <- function(input, output, session) { load('data.Rdata') + metadata <- unique(attributes[, list(collection, feature_name, feature_value)]) + SD_MULT = 3 SD_SCALE = 1.95 @@ -56,6 +58,343 @@ server <- function(input, output, session) { ) }) + output$maxnftrankinput2 <- renderUI({ + textInput( + inputId = 'maxnftrank2' + , label = NULL + , width = "100%" + ) + }) + output$minnftrankinput2 <- renderUI({ + textInput( + inputId = 'minnftrank2' + , label = NULL + , width = "100%" + ) + }) + + output$maxrarityrankinput2 <- renderUI({ + textInput( + inputId = 'maxrarityrank2' + , label = NULL + , width = "100%" + ) + }) + output$minrarityrankinput2 <- renderUI({ + textInput( + inputId = 'minrarityrank2' + , label = NULL + , width = "100%" + ) + }) + + output$filter1select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 1) { + return(NULL) + } + name <- name[1] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter1' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter2select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 2) { + return(NULL) + } + name <- name[2] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter2' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter3select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 3) { + return(NULL) + } + name <- name[3] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter3' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter4select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 4) { + return(NULL) + } + name <- name[4] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter4' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter5select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 5) { + return(NULL) + } + name <- name[5] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter5' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter6select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 6) { + return(NULL) + } + name <- name[6] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter6' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter7select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 7) { + return(NULL) + } + name <- name[7] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter7' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter8select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 8) { + return(NULL) + } + name <- name[8] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter8' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter9select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 9) { + return(NULL) + } + name <- name[9] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter9' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter10select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 10) { + return(NULL) + } + name <- name[10] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter10' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter11select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 11) { + return(NULL) + } + name <- name[11] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter11' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter12select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 12) { + return(NULL) + } + name <- name[12] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter12' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter13select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 13) { + return(NULL) + } + name <- name[13] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter13' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter14select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 14) { + return(NULL) + } + name <- name[14] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter14' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter15select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 15) { + return(NULL) + } + name <- name[15] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter15' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter16select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 16) { + return(NULL) + } + name <- name[16] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter16' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter17select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 17) { + return(NULL) + } + name <- name[17] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter17' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$filter18select <- renderUI({ + selected <- getCollection() + name <- getMetadataColumns() + if(length(name) < 18) { + return(NULL) + } + name <- name[18] + m <- metadata[ collection == eval(selected) & feature_name == eval(name) ] + choices <- c('Any', sort(m$feature_value)) + selectInput( + inputId = 'filter18' + , label = NULL + , selected = 'Any' + , choices = choices + , width = "100%" + ) + }) + output$collectionselect <- renderUI({ choices <- sort(unique(pred_price$collection)) selectInput( @@ -128,7 +467,40 @@ server <- function(input, output, session) { cur_0 <- pred_price[collection == eval(selected) ] cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ] if (nrow(cur_1)) { - t <- paste0("Market Rank #", format(cur_1$rk[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=",")) + t <- paste0("Deal Score Rank #", format(cur_1$rk[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=",")) + } + } + paste0(t) + }) + + output$salesAverage <- renderText({ + data <- getSalesData() + t <- '' + if (nrow(data)) { + p <- format(round(mean(head(data$price, 100)), 1), big.mark=',') + f <- format(round(mean(head(data$vs_floor, 100)), 1), big.mark=',') + print('p') + print(p) + print(f) + t <- paste0(p, ' $SOL (+',f,' vs the floor)') + } + paste0(t) + }) + + output$rarityrank <- renderText({ + id <- getTokenId() + selected <- getCollection() + chain <- getChain() + t <- "" + if( length(id) == 0 | length(selected) == 0 ) { + return(t) + } + if (!is.na(id) & !is.na(selected)) { + cur_0 <- pred_price[collection == eval(selected) ] + cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ] + if (nrow(cur_1)) { + a <- ifelse( chain == 'Solana', 'HowRare', 'NotFoundTerra' ) + t <- paste0(a, " Rank #", format(cur_1$nft_rank[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=",")) } } paste0(t) @@ -213,7 +585,7 @@ server <- function(input, output, session) { return(head(attributes, 0)) } cur <- attributes[ token_id == eval(as.numeric(id)) & collection == eval(selected) ] - # cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE ) + cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pct_vs_baseline) ], all.x=TRUE ) cur <- cur[order(rarity)] # floor <- getFloors()[2] # log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1] @@ -228,9 +600,9 @@ server <- function(input, output, session) { # mult <- ratio / s # cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ] # } - cur[, vs_baseline := 0 ] - cur[, pred_vs_baseline := 0 ] - cur[, vs_baseline := 0 ] + # cur[, vs_baseline := 0 ] + # cur[, pred_vs_baseline := 0 ] + # cur[, vs_baseline := 0 ] # cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ] # cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ] # cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ] @@ -246,7 +618,7 @@ server <- function(input, output, session) { # reactable(data[, list( feature, value, rarity, vs_baseline, pred_vs_baseline, pct_vs_baseline )], # data <- data[, list( feature, value, rarity, pct_vs_baseline )] - data <- data[, list( feature_name, feature_value, rarity )] + data <- data[, list( feature_name, feature_value, rarity, pct_vs_baseline )] reactable(data, defaultColDef = colDef( headerStyle = list(background = "#10151A") @@ -256,16 +628,16 @@ server <- function(input, output, session) { outlined = FALSE, columns = list( feature_name = colDef(name = "Attribute", align = "left"), - feature_value = colDef(name = "Value", align = "left"), - rarity = colDef(name = "Rarity", align = "left") - # pct_vs_baseline = colDef( - # name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor") - # , html = TRUE - # , align = "left" - # , cell = function(x) { - # htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%')) - # } - # ) + feature_value = colDef(name = "Name", align = "left"), + rarity = colDef(name = "Rarity", align = "left"), + pct_vs_baseline = colDef( + name="General Price Impact", header=with_tooltip("General Price Impact", "The estimated price impact of this feature vs the floor") + , html = TRUE + , align = "left" + , cell = function(x) { + htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%')) + } + ) ) ) }) @@ -328,41 +700,221 @@ server <- function(input, output, session) { ) }) - output$salestable <- renderReactable({ + getFilteredSalesData <- function(data, selected, val, i) { + if(length(val) > 0) { + if(val != 'Any') { + att <- getMetadataColumns() + if(length(att) >= i) { + att <- att[i] + include <- attributes[collection == eval(selected) & feature_name == eval(att) & feature_value == eval(val), list(token_id) ] + data <- merge(data, include) + } + } + } + return(data) + } + + getSalesDataFn <- function(selected, sales, tokens, pred_price, attributes) { + data <- sales[ collection == eval(selected)] + m <- pred_price[collection == eval(selected), list(token_id, rk)] + data <- merge(data, m, all.x=TRUE) + + data <- merge(data, tokens[collection == eval(selected), list(collection, token_id, image_url)], all.x=T ) + data <- data[, list( token_id, image_url, block_timestamp, price, pred, mn_20, nft_rank, rk )] + + data <- data[order(-block_timestamp)] + + data[, vs_floor := pmax(0, price - mn_20) ] + + m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, feature_value)], token_id ~ feature_name, value.var='feature_value') + names <- colnames(m) + data <- merge(data, m, all.x=TRUE) + + + data <- data[order(-block_timestamp)] + data[, mn_20 := pmin(mn_20, price) ] + data[, mn_20_label := paste0(format(round(mn_20, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] + data[, price_label := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] + data[, block_timestamp := substr(block_timestamp, 1, 10) ] + return(data) + } + + getSalesData <- reactive({ selected <- getCollection() if( length(selected) == 0 ) { return(NULL) } # data <- sales[ collection == eval(selected) , list( token_id, block_timestamp, price, pred, mn_20 )] - data <- sales[ collection == eval(selected) , list( token_id, block_timestamp, price )] - data[, price := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] - # data[, pred := paste0(format(round(pred, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] - + data <- sales[ collection == eval(selected)] m <- pred_price[collection == eval(selected), list(token_id, rk)] data <- merge(data, m, all.x=TRUE) - m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, clean_name)], token_id ~ feature_name, value.var='clean_name') - data <- merge(data, m, all.x=TRUE) + if(input$maxnftrank2 != '') { + r <- as.numeric(input$maxnftrank2) + data <- data[ rk <= eval(r) ] + } + if(input$minnftrank2 != '') { + data <- data[ rk >= eval(as.numeric(input$minnftrank2)) ] + } + if(input$maxrarityrank2 != '') { + r <- as.numeric(input$maxrarityrank2) + data <- data[ nft_rank <= eval(r) ] + } + if(input$minrarityrank2 != '') { + data <- data[ nft_rank >= eval(as.numeric(input$minrarityrank2)) ] + } + data <- getFilteredSalesData(data, selected, input$filter1, 1) + data <- getFilteredSalesData(data, selected, input$filter2, 2) + data <- getFilteredSalesData(data, selected, input$filter3, 3) + data <- getFilteredSalesData(data, selected, input$filter4, 4) + data <- getFilteredSalesData(data, selected, input$filter5, 5) + data <- getFilteredSalesData(data, selected, input$filter6, 6) + data <- getFilteredSalesData(data, selected, input$filter7, 7) + data <- getFilteredSalesData(data, selected, input$filter8, 8) + data <- getFilteredSalesData(data, selected, input$filter9, 9) + data <- getFilteredSalesData(data, selected, input$filter10, 10) + data <- getFilteredSalesData(data, selected, input$filter11, 11) + data <- getFilteredSalesData(data, selected, input$filter12, 12) + data <- getFilteredSalesData(data, selected, input$filter13, 13) + data <- getFilteredSalesData(data, selected, input$filter14, 14) + data <- getFilteredSalesData(data, selected, input$filter15, 15) + data <- getFilteredSalesData(data, selected, input$filter16, 16) + data <- getFilteredSalesData(data, selected, input$filter17, 17) + data <- getFilteredSalesData(data, selected, input$filter18, 18) + data <- getFilteredSalesData(data, selected, input$filter19, 19) + data <- getFilteredSalesData(data, selected, input$filter20, 20) - data <- data[order(-block_timestamp)] + data <- merge(data, tokens[collection == eval(selected), list(collection, token_id, image_url)], all.x=T ) + data <- data[, list( token_id, image_url, block_timestamp, price, pred, mn_20, nft_rank, rk )] - reactable(data, - defaultColDef = colDef( - headerStyle = list(background = "#10151A") - ), - filterable = TRUE, - borderless = TRUE, - outlined = FALSE, - searchable = FALSE, - columns = list( - token_id = colDef(name = "Token ID", align = "left"), - block_timestamp = colDef(name = "Sale Date", align = "left"), - price = colDef(name = "Price", align = "left"), - # pred = colDef(name = "Fair Market Price", align = "left"), - rk = colDef(name = "DS Rank", align = "left") - # mn_20 = colDef(name = "Floor Price", align = "left") - ) - ) + data <- data[order(-block_timestamp)] + + data[, vs_floor := pmax(0, price - mn_20) ] + + m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, feature_value)], token_id ~ feature_name, value.var='feature_value') + names <- colnames(m) + data <- merge(data, m, all.x=TRUE) + + + data <- data[order(-block_timestamp)] + data[, mn_20 := pmin(mn_20, price) ] + data[, mn_20_label := paste0(format(round(mn_20, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] + data[, price_label := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))] + data[, block_timestamp := substr(block_timestamp, 1, 10) ] + return(data) + }) + + getMetadataColumns <- reactive({ + selected <- getCollection() + m <- unique(metadata[ collection == eval(selected), list(feature_name) ]) + names <- sort(m$feature_name) + return(names) + }) + + getFilterText <- function(i) { + t <- '' + m <- getMetadataColumns() + if(length(m) >= i) { + t <- m[i] + } + return(t) + } + + + output$filter1 <- renderText({ + paste0(getFilterText(1)) + }) + output$filter2 <- renderText({ + paste0(getFilterText(2)) + }) + output$filter3 <- renderText({ + paste0(getFilterText(3)) + }) + output$filter4 <- renderText({ + paste0(getFilterText(4)) + }) + output$filter5 <- renderText({ + paste0(getFilterText(5)) + }) + output$filter6 <- renderText({ + paste0(getFilterText(6)) + }) + output$filter7 <- renderText({ + paste0(getFilterText(7)) + }) + output$filter8 <- renderText({ + paste0(getFilterText(8)) + }) + output$filter9 <- renderText({ + paste0(getFilterText(9)) + }) + output$filter10 <- renderText({ + paste0(getFilterText(10)) + }) + output$filter11 <- renderText({ + paste0(getFilterText(11)) + }) + output$filter12 <- renderText({ + paste0(getFilterText(12)) + }) + output$filter13 <- renderText({ + paste0(getFilterText(13)) + }) + output$filter14 <- renderText({ + paste0(getFilterText(14)) + }) + output$filter15 <- renderText({ + paste0(getFilterText(15)) + }) + output$filter16 <- renderText({ + paste0(getFilterText(16)) + }) + output$filter17 <- renderText({ + paste0(getFilterText(17)) + }) + output$filter18 <- renderText({ + paste0(getFilterText(18)) + }) + output$filter19 <- renderText({ + paste0(getFilterText(19)) + }) + output$filter20 <- renderText({ + paste0(getFilterText(20)) + }) + + output$salestable <- renderReactable({ + selected <- getCollection() + if( length(selected) == 0 ) { + return(NULL) + } + # data <- future(getSalesData()) %...>% head() %>% print() + data <- getSalesData() + # data <- future(getSalesDataFn(selected, sales, tokens, pred_price, attributes)) %...>% + reactable(data, + defaultColDef = colDef( + headerStyle = list(background = "#10151A") + ), + # filterable = TRUE, + borderless = TRUE, + outlined = FALSE, + searchable = FALSE, + columns = list( + token_id = colDef(name = "Token ID", align = "left"), + image_url = colDef(name = "Token", align = "left", cell = function(value, index) { + if(index <= 100) { + htmltools::tags$img(src=value) + } else { + return(NULL) + } + }), + block_timestamp = colDef(name = "Sale Date", align = "left"), + price_label = colDef(name = "Price", align = "left"), + pred = colDef(name = "Fair Market Price", align = "left"), + rk = colDef(name = "Deal Score Rank", align = "left"), + nft_rank = colDef(name = "Rarity Rank", align = "left"), + mn_20_label = colDef(name = "Floor Price", align = "left") + ) + ) }) getPriceDistributionData <- reactive({ @@ -538,6 +1090,9 @@ server <- function(input, output, session) { df <- merge(df, tokens[collection == eval(selected), list(collection, token_id, image_url)] ) tuple <- getConvertedPrice() floors <- getFloors() + print('getListingData') + print(tuple) + print(floors) df[, pred_price_0 := pred_price ] df[, pred_price := pred_price + eval(tuple[1]) + ( eval(tuple[2]) * pred_price / eval(floors[1]) ) ] @@ -550,7 +1105,7 @@ server <- function(input, output, session) { df[, pred_price := paste0(format(round(pred_price, 1), digits=3, decimal.mark=".", big.mark=",")) ] df <- df[, list(image_url, token_id, price, pred_price, deal_score, rk)] - m <- dcast(attributes[collection == eval(selected)], collection + token_id ~ feature_name, value.var='clean_name') + m <- dcast(attributes[collection == eval(selected)], collection + token_id ~ feature_name, value.var='feature_value') df <- merge(df, m, all.x=TRUE) df[, collection := NULL] df <- df[order(-deal_score)] @@ -564,6 +1119,8 @@ server <- function(input, output, session) { if( nrow(df) == 0 ) { return(NULL) } + print('head(df)') + print(head(df)) df <- df[ deal_score >= 10 ] df[, hover_text := paste0('#',token_id,'
Listing Price: ',price,'
Fair Market Price: ',pred_price,'
Deal Score: ',deal_score) ] f <- min(df[price > 0]$price) @@ -706,7 +1263,10 @@ server <- function(input, output, session) { if (name == 'solana-monkey-business') name <- 'smb' if (name == 'degen-ape-academy') name <- 'degenapes' href <- paste0('https://howrare.is/',name,'/',id) - url <- span("*Rarity from ", a("howrare.is", href=href)," used in the model") + cur_0 <- pred_price[collection == eval(selected) ] + cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ] + + url <- span("*Rarity from ", a("howrare.is", href=href),paste0(" (rank #",format(cur_1$nft_rank[1], big.mark = ','),") used in the model")) HTML(paste(url)) }) @@ -755,14 +1315,18 @@ server <- function(input, output, session) { filterable = TRUE, outlined = FALSE, columns = list( - image_url = colDef(name = "Token", align = "left", cell = function(value) { - htmltools::tags$img(src=value) + image_url = colDef(name = "Token", align = "left", cell = function(value, index) { + if(index <= 100) { + htmltools::tags$img(src=value) + } else { + return(NULL) + } }), token_id = colDef(name = "Token ID", align = "left"), price = colDef(name = "Listed Price", align = "left"), pred_price = colDef(name = "Fair Market Price", align = "left"), deal_score = colDef(name = "Deal Score", align = "left"), - rk = colDef(name = "Market Rank", align = "left") + rk = colDef(name = "Deal Score Rank", align = "left") ), searchable = FALSE ) diff --git a/viz/ui.R b/viz/ui.R index 642f83d9..73588524 100644 --- a/viz/ui.R +++ b/viz/ui.R @@ -78,13 +78,16 @@ fluidPage( div(class = "subtitle", textOutput("tokenrank", inline=TRUE), icon(class="padding-left-5", id="rank-tooltip", "info-circle") ) , bsTooltip(id = "rank-tooltip", title = "Dynamic value rank based on the estimated fair market price modeled from historical sales. Model and rank will update periodically as we get more sales data.", placement = "bottom", trigger = "hover") ) + , div( + div(class = "subtitle", textOutput("rarityrank", inline=TRUE)) + ) , div(class = "link", uiOutput('randomearthurl')) ) , fluidRow( - column(6 + column(5 , div(class = "token-img", uiOutput("tokenimg")) ) - , column(6, div( + , column(7, div( class = "table" , reactableOutput("attributestable") , bsTooltip(id = "value-tooltip", title = "Represents the dollar impact this feature has on the price vs the floor", placement = "bottom", trigger = "hover") @@ -110,12 +113,11 @@ fluidPage( , div(class='description', 'Click a dot to select the token') ) , fluidRow( - column(4 + class = 'filters' + , column(4 , div( class = "inputtitle" , "Max Price" - # , icon(id="floor-price-tooltip", "info-circle") - # , bsTooltip(id = "floor-price-tooltip", title = "Update this number to the current floor price of the collection, which will update the rest of the numbers on this page", placement = "bottom", trigger = "hover") ) , fluidRow(uiOutput("maxpriceinput")) ) @@ -135,6 +137,126 @@ fluidPage( class="grey8row" , h2("Historical Sales", icon(class="padding-left-10", id="historical-sales-tooltip", "info-circle")) , bsTooltip(id = "historical-sales-tooltip", title = "This app is still in beta - sales data may be incomplete or delayed", placement = "bottom", trigger = "hover") + , fluidRow( + class = 'filters' + , column(3 + , div( + class = "inputtitle" + , "Min Deal Score Rank" + ) + , fluidRow(uiOutput("minnftrankinput2")) + ) + , column(3 + , div( + class = "inputtitle" + , "Max Deal Score Rank" + ) + , fluidRow(uiOutput("maxnftrankinput2")) + ) + , column(3 + , div( + class = "inputtitle" + , "Min Rarity Rank" + ) + , fluidRow(uiOutput("minrarityrankinput2")) + ) + , column(3 + , div( + class = "inputtitle" + , "Max Rarity Rank" + ) + , fluidRow(uiOutput("maxrarityrankinput2")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter1', inline=TRUE) + ) + , fluidRow(uiOutput("filter1select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter2', inline=TRUE) + ) + , fluidRow(uiOutput("filter2select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter3', inline=TRUE) + ) + , fluidRow(uiOutput("filter3select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter4', inline=TRUE) + ) + , fluidRow(uiOutput("filter4select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter5', inline=TRUE) + ) + , fluidRow(uiOutput("filter5select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter6', inline=TRUE) + ) + , fluidRow(uiOutput("filter6select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter7', inline=TRUE) + ) + , fluidRow(uiOutput("filter7select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter8', inline=TRUE) + ) + , fluidRow(uiOutput("filter8select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter9', inline=TRUE) + ) + , fluidRow(uiOutput("filter9select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter10', inline=TRUE) + ) + , fluidRow(uiOutput("filter10select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter11', inline=TRUE) + ) + , fluidRow(uiOutput("filter11select")) + ) + , column(3 + , div( + class = "inputtitle" + , textOutput('filter12', inline=TRUE) + ) + , fluidRow(uiOutput("filter12select")) + ) + ) + , div( + class = 'padding-bottom-1' + , 'Average from most recent 100 sales using these filters: ' + , textOutput('salesAverage', inline=TRUE) + ) , div(class = "table", reactableOutput("salestable")) , div(class = "description", 'This app is still in beta - sales data may be incomplete or delayed') ) diff --git a/viz/www/styles.css b/viz/www/styles.css index f3b65bd0..1ec00e2d 100644 --- a/viz/www/styles.css +++ b/viz/www/styles.css @@ -232,6 +232,32 @@ input[type=number] { } +/*******************/ +/* Filters */ +/*******************/ +.filters > div > .inputtitle { + font-weight: 100; + font-size: 1.25rem; + padding: 0; + margin: 0; +} +.filters > div > .row > div > .form-group { + padding: 0 1rem 1rem 0; + margin: 0; +} +.filters .form-control, .filters .selectize-input > *, .filters .selectize-dropdown > * { + font-weight: 100; + font-size: 1.25rem; +} +.filters > div { + padding: 0; + margin: 0; +} + +.filter:first-child() { + padding-left: 0; +} + /***********************/ /* React Table */ /***********************/ @@ -298,6 +324,9 @@ tr { /*******************/ /* General */ /*******************/ +.padding-bottom-1 { + padding-bottom: 1rem; +} .row { margin: 0; }