diff --git a/format_data.py b/format_data.py
index ff1a4bfb..d69dd94b 100644
--- a/format_data.py
+++ b/format_data.py
@@ -3,6 +3,7 @@ import os
 import math
 import json
 import pandas as pd
+from scrape_sol_nfts import clean_name
 import snowflake.connector
 
 os.chdir('/Users/kellenblumberg/git/nft-deal-score')
@@ -235,6 +236,80 @@ def levana():
 		with open('./data/metadata/levana_dragon_eggs/{}.txt'.format(i), 'w') as outfile:
 			outfile.write(json.dumps(newd))
 
+def solana():
+	mints = pd.read_csv('./data/solana_rarities.csv')
+	collection_info = pd.read_csv('./data/collection_info.csv')
+	metadata = pd.read_csv('./data/metadata.csv')
+	tokens = pd.read_csv('./data/tokens.csv')
+	tokens['token_id'] = tokens.token_id.astype(str)
+	metadata['token_id'] = metadata.token_id.astype(str)
+	metadata = metadata.merge(tokens)
+	metadata = metadata.merge(collection_info)
+	metadata['token_id'] = metadata.clean_token_id.fillna(metadata.token_id)
+	metadata = metadata[-metadata.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])]
+
+	metadata['token_id'] = metadata.token_id.astype(int)
+	mints['token_id'] = mints.token_id.astype(int)
+	mints['collection'] = mints.collection.apply(lambda x: clean_name(x) )
+
+	# metadata[['collection']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
+
+	for collection in metadata.collection.unique()[2:]:
+		print(collection)
+		mdf = metadata[metadata.collection == collection]
+		results = []
+		for token_id in mdf.token_id.unique():
+			pass
+			cur = mdf[mdf.token_id == token_id]
+			token_metadata = {}
+			m = mints[(mints.collection == collection) & (mints.token_id == token_id) ]
+			if not len(m):
+				print(token_id)
+				continue
+			mint_address = m.mint_address.values[0]
+			for row in cur.iterrows():
+				row = row[1]
+				token_metadata[row['feature_name']] = row['feature_value']
+
+			d = {
+				'commission_rate': None
+				, 'mint_address': mint_address
+				, 'token_id': token_id
+				, 'contract_address': row['contract_address']
+				, 'contract_name': row['collection']
+				, 'created_at_block_id': 0
+				, 'created_at_timestamp': str(row['created_at_timestamp'])
+				, 'created_at_tx_id': ''
+				, 'creator_address': row['contract_address']
+				, 'creator_name': row['collection']
+				, 'image_url': row['image_url']
+				, 'project_name': row['collection']
+				, 'token_id': int(token_id)
+				, 'token_metadata': token_metadata
+				, 'token_metadata_uri': row['image_url']
+				, 'token_name': row['collection']
+			}
+			results.append(d)
+		print('Uploading {} results'.format(len(results)))
+
+		n = 50
+		r = math.ceil(len(results) / n)
+		for i in range(r):
+			newd = {
+				"model": {
+					"blockchain": "solana",
+					"sinks": [
+						{
+							"destination": "{database_name}.silver.nft_metadata",
+							"type": "snowflake",
+							"unique_key": "blockchain || contract_address || token_id"
+						}
+					],
+				},
+				"results": results[(i * n):((i * n)+r)]
+			}
+			with open('./data/metadata/{}/{}.txt'.format(collection, i), 'w') as outfile:
+				outfile.write(json.dumps(newd))
 def bayc():
 	with open('./data/bayc.json') as f:
 		j = json.load(f)
diff --git a/prepare_data.py b/prepare_data.py
index 57fe18f6..cc48f5b7 100644
--- a/prepare_data.py
+++ b/prepare_data.py
@@ -50,7 +50,7 @@ for c in m_df.collection.unique():
 print(m_df[(m_df.token_id=='1') & (m_df.collection == 'Solana Monkey Business')])
 print(m_df[(m_df.token_id=='10') & (m_df.collection == 'Aurory')])
 
-for c in [ 'nft_rank','adj_nft_rank_0','adj_nft_rank_1' ]:
+for c in [ 'nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2' ]:
     cur = rarities[[ 'collection','token_id',c ]].rename(columns={c: 'feature_value'})
     cur['feature_name'] = c
     m_df = m_df[ m_df.feature_name != c ]
diff --git a/scrape_sol_nfts.py b/scrape_sol_nfts.py
index 26523426..2b70f5a8 100644
--- a/scrape_sol_nfts.py
+++ b/scrape_sol_nfts.py
@@ -17,6 +17,19 @@ from selenium.webdriver.common.keys import Keys
 os.chdir('/Users/kellenblumberg/git/nft-deal-score')
 os.environ['PATH'] += os.pathsep + '/Users/kellenblumberg/shared/'
 
+# Updates
+# Final updates to NTR App
+# Helped gather mint_address data and metadata for solana hackathon
+# Updated NFT Deal score model to enable easy addition to 
+# Accomplishments
+# Version 1.0 of NTR app is now live at https://rstudio-connect.flipside.kitchen/ntr/ thanks to @eric
+# Problems Encountered
+# Still waiting for Harmony data to be released (was hoping it would be ready early this week)
+# Priorities
+# Assist with Solana <3 week where needed ()
+# Build DeFi Kingdoms query
+# Concerns
+
 # browser = webdriver.Chrome()
 
 # old = pd.read_csv('./data/tokens.csv')
@@ -380,9 +393,31 @@ def scrape_recent_sales():
 	del o_sales['tmp']
 	o_sales.to_csv('./data/sales.csv', index=False)
 
+def scrape_solanafloor():
+	browser.get('https://solanafloor.com/')
+	soup = BeautifulSoup(browser.page_source)
+	d0 = soup.find_all('div', class_='ag-pinned-left-cols-container')
+	d1 = soup.find_all('div', class_='ag-center-cols-clipper')
+	len(d0)
+	len(d1)
+	d0 = d0[1]
+	d1 = d1[1]
+	rows0 = d0.find_all('div', class_='ag-row')
+	rows1 = d1.find_all('div', class_='ag-row')
+	data = []
+	for r in rows1:
+		cell1 = r.find_all('div', class_='ag-cell')
+		a = cell1[0].find_all('a')[0]
+		project = re.split('/', a.attrs['href'])[-1]
+		data += [[ project, int('Lite' in cell1[0].text) ]]
+	df = pd.DataFrame(data, columns=['project','is_lite'])
+	df.to_csv('./data/sf_projects.csv', index=False)
+
+
 def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenapes','peskypenguinclub' ], alerted = [], is_listings = True):
 	print('Scraping solanafloor listings...')
 	data = []
+	m_data = []
 	# collections = [ 'aurory','thugbirdz','meerkatmillionaires','aurory','degenapes' ]
 	# collections = [ 'aurory','thugbirdz','smb','degenapes' ]
 	# collections = [ 'smb' ]
@@ -391,7 +426,11 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 		, 'degenapes': 'degen-ape-academy'
 		, 'peskypenguinclub': 'pesky-penguins'
 	}
-	collection = 'smb'
+	collections = ['the-suites']
+	sf_projects = pd.read_csv('./data/sf_projects.csv')
+	old = pd.read_csv('./data/solana_rarities.csv')
+	collections = sf_projects[(sf_projects.to_scrape==1) & (sf_projects.is_lite==0) & (-sf_projects.collection.isin(old.collection.unique()))].collection.unique()
+	collection = 'portals'
 	for collection in collections:
 		if collection == 'boryokudragonz':
 			continue
@@ -410,6 +449,7 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 			page += 1
 			for j in [20, 30, 30, 30, 30, 30, 30, 30] * 1:
 				for _ in range(1):
+					pass
 					soup = BeautifulSoup(browser.page_source)
 					# for row in browser.find_elements_by_class_name('ag-row'):
 					# 	cells = row.find_elements_by_class_name('ag-cell')
@@ -419,12 +459,16 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 					# 		data += [[ collection, token_id, price ]]
 					d0 = soup.find_all('div', class_='ag-pinned-left-cols-container')
 					d1 = soup.find_all('div', class_='ag-center-cols-clipper')
+					h1 = soup.find_all('div', class_='ag-header-row')
 					if not len(d0) or not len(d1):
 						continue
 					d0 = d0[0]
 					d1 = d1[0]
+					h1 = h1[1]
 					rows0 = d0.find_all('div', class_='ag-row')
 					rows1 = d1.find_all('div', class_='ag-row')
+					hs1 = h1.find_all('div', class_='ag-header-cell')
+					hs1 = [ x.text.strip() for x in hs1 ]
 					for k in range(len(rows0)):
 						# for row in soup.find_all('div', class_='ag-row'):
 						# 	# print(row.text)
@@ -432,6 +476,7 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 						cell1 = rows1[k].find_all('div', class_='ag-cell')
 						if len(cell1) > 2:
 							token_id = cell0[0].text
+							mint_address = re.split('/', cell0[0].find_all('a')[0].attrs['href'])[-1] if len(cell0[0].find_all('a')) else None
 							price = cell1[2 if is_listings else 0].text
 							if len(token_id) and len(price):
 								# token_id = int(token_id[0].text)
@@ -443,7 +488,12 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 								if not price and is_listings:
 									continue
 								if not token_id in seen:
-									data += [[ collection, token_id, price ]]
+									if not is_listings:
+										data += [[ collection, token_id, mint_address, price ]]
+										for l in range(len(hs1)):
+											m_data += [[ collection, token_id, mint_address, hs1[l], cell1[l].text.strip() ]]
+									else:
+										data += [[ collection, token_id, price ]]
 									seen.append(token_id)
 							# else:
 							# 	print(row.text)
@@ -459,12 +509,25 @@ def scrape_listings(browser, collections = [ 'aurory','thugbirdz','smb','degenap
 			else:
 				has_more = False
 				break
-	if not is_listings:
-		old = pd.read_csv('./data/solana_rarities.csv')
-		rarities = pd.DataFrame(data, columns=['collection','token_id','nft_rank']).drop_duplicates()
-		rarities = rarities.append(old).drop_duplicates()
-		print(rarities.groupby('collection').token_id.count())
-		rarities.to_csv('./data/solana_rarities.csv', index=False)
+		if not is_listings:
+			old = pd.read_csv('./data/solana_rarities.csv')
+			rarities = pd.DataFrame(data, columns=['collection','token_id','mint_address','nft_rank']).drop_duplicates()
+			rarities = rarities.append(old).drop_duplicates()
+			rarities = rarities[-rarities.collection.isin(rem)]
+			print(rarities.groupby('collection').token_id.count().reset_index().sort_values('token_id'))
+			rarities.to_csv('./data/solana_rarities.csv', index=False)
+
+			old = pd.read_csv('./data/sf_metadata.csv')
+			metadata = pd.DataFrame(m_data, columns=['collection','token_id','mint_address','feature_name','feature_value']).drop_duplicates()
+			metadata = metadata[ -metadata.feature_name.isin(['Rank *','Owner','Listed On','Price','USD','Buy Link']) ]
+			metadata = metadata.append(old).drop_duplicates()
+			metadata.feature_name.unique()
+			g = metadata[[ 'collection','token_id' ]].drop_duplicates().groupby('collection').token_id.count().reset_index().sort_values('token_id')
+			rem = g[g.token_id<99].collection.unique()
+			metadata = metadata[-metadata.collection.isin(rem)]
+			print(g)
+			# g.to_csv('~/Downloads')
+			metadata.to_csv('./data/sf_metadata.csv', index=False)
 
 	old = pd.read_csv('./data/listings.csv')
 	listings = pd.DataFrame(data, columns=['collection','token_id','price']).drop_duplicates()
@@ -894,6 +957,108 @@ def scratch():
 	o_sales.head()
 	o_sales.to_csv('./data/md_sales.csv', index=False)
 
+def create_mint_csv():
+	mints = pd.DataFrame()
+	auth_to_mint = {}
+	for collection, update_authority in d.items():
+		auth_to_mint[update_authority] = collection
+	for fname in [ './data/mints/'+f for f in os.listdir('./data/mints') ]:
+		pass
+		with open(fname, 'r') as f:
+			j = json.load(f)
+			cur = pd.DataFrame(j)
+			cur.columns = ['mint_address']
+			cur['update_authority'] = re.split('/|_', fname)[3]
+			cur['collection'] = re.split('/|_', fname)[3]
+
+def scrape_how_rare_is():
+	d = {
+		'degenapes': 40
+		,'aurory': 40
+	}
+	data = []
+	for collection, num_pages in d.items():
+		for page in range(num_pages):
+			if len(data):
+				print(data[-1])
+			url = 'https://howrare.is/{}/?page={}&ids=&sort_by=rank'.format(collection, page)
+			browser.get(url)
+			sleep(0.1)
+			soup = BeautifulSoup(browser.page_source)
+			len(soup.find_all('div', class_='featured_item_img'))
+			for div in soup.find_all('div', class_='featured_item_img'):
+				image_url = div.find_all('img')[0].attrs['src']
+				token_id = re.split('/', div.find_all('a')[0].attrs['href'])[-2]
+				data += [[ collection, token_id, image_url ]]
+	df = pd.DataFrame(data, columns=['collection','token_id','image_url'])
+	df['collection'] = df.collection.apply(lambda x: clean_name(x) )
+	df['clean_token_id'] = df.token_id
+	df['chain'] = 'Solana'
+	tokens = pd.read_csv('./data/tokens.csv')
+	tokens = tokens[-tokens.collection.isin(df.collection.unique())]
+	tokens = tokens.append(df)
+	tokens.to_csv('./data/tokens.csv', index=False)
+
+			
+
+def scrape_mints():
+
+	nft_mint_addresses = pd.read_csv('./data/nft_mint_addresses.csv')
+	nft_mint_addresses['collection'] = nft_mint_addresses.collection.apply(lambda x: clean_name(x) )
+	nft_mint_addresses.head()
+
+	solana_nfts = pd.read_csv('./data/solana_nfts.csv')
+	solana_nfts = solana_nfts[solana_nfts.update_authority.notnull()]
+	solana_nfts = solana_nfts[solana_nfts.collection != 'Boryoku Baby Dragonz']
+	print(solana_nfts.groupby('update_authority').collection.count().reset_index().sort_values('collection', ascending=0).head(10))
+	
+	nft_mint_addresses.collection.unique()
+	nft_mint_addresses = nft_mint_addresses.merge( solana_nfts )
+	nft_mint_addresses.collection.unique()
+	mints = pd.read_csv('./data/solana_mints.csv')
+	mints = mints[-mints.collection.isin(nft_mint_addresses.collection.unique())]
+	mints = mints.append(nft_mint_addresses[list(mints.columns)])
+	mints.head()
+	seen = list(mints.update_authority.unique())
+	rpc = 'https://red-cool-wildflower.solana-mainnet.quiknode.pro/a1674d4ab875dd3f89b34863a86c0f1931f57090/'
+	d = {}
+	for row in solana_nfts.iterrows():
+		row = row[1]
+		d[row['collection']] = row['update_authority']
+
+	remaining = sorted(solana_nfts[-solana_nfts.collection.isin(mints.collection.unique())].collection.unique())
+	print('{}'.format(len(remaining)))
+	collection = 'Boryoku Dragonz'
+	for collection in remaining:
+		update_authority = d[collection]
+		if update_authority in seen or collection in [ 'Solana Monkey Business','Thugbirdz','Degenerate Ape Academy','Pesky Penguins','Aurory' ]:
+			print('Seen '+collection)
+			continue
+		else:
+			print('Working on '+collection)
+			sleep(.10 * 60)
+			os.system('metaboss -r {} -t 300 snapshot mints --update-authority {} --output ~/git/nft-deal-score/data/mints '.format(rpc, update_authority))
+
+	mints = pd.DataFrame()
+	auth_to_mint = {}
+	for collection, update_authority in d.items():
+		auth_to_mint[update_authority] = collection
+	for fname in [ './data/mints/'+f for f in os.listdir('./data/mints') ]:
+		if not '.json' in fname:
+			continue
+		with open(fname, 'r') as f:
+			j = json.load(f)
+			cur = pd.DataFrame(j)
+			if len(cur):
+				cur.columns = ['mint_address']
+				cur['update_authority'] = re.split('/|_', fname)[3]
+				cur['collection'] = cur.update_authority.apply(lambda x: auth_to_mint[x] )
+				mints = mints.append(cur)
+	g = mints.groupby('collection').update_authority.count().reset_index()
+	mints[mints.update_authority == 'DRGNjvBvnXNiQz9dTppGk1tAsVxtJsvhEmojEfBU3ezf']
+	g.to_csv('~/Downloads/tmp.csv', index=False)
+	mints.to_csv('./data/solana_mints.csv', index=False)
+
 # scrape_listings(['smb'])
 # alerted = []
 # for i in range(1):
diff --git a/scripts/solana-rpc-app/src/index.js b/scripts/solana-rpc-app/src/index.js
new file mode 100644
index 00000000..dde2c5d7
--- /dev/null
+++ b/scripts/solana-rpc-app/src/index.js
@@ -0,0 +1,10 @@
+"use strict";
+exports.__esModule = true;
+exports.hello = void 0;
+var world = 'world';
+function hello(world) {
+    if (world === void 0) { world = 'world'; }
+    return "Hello ".concat(world, "! ");
+}
+exports.hello = hello;
+console.log("Hello!");
diff --git a/scripts/solana-rpc-app/src/index.ts b/scripts/solana-rpc-app/src/index.ts
new file mode 100644
index 00000000..a9e25c35
--- /dev/null
+++ b/scripts/solana-rpc-app/src/index.ts
@@ -0,0 +1,117 @@
+import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js';
+// import bs58 from 'bs58';
+
+const connection = new Connection(clusterApiUrl('mainnet-beta'));
+const MAX_NAME_LENGTH = 32;
+const MAX_URI_LENGTH = 200;
+const MAX_SYMBOL_LENGTH = 10;
+const MAX_CREATOR_LEN = 32 + 1 + 1;
+const MAX_CREATOR_LIMIT = 5;
+const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN;
+const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172;
+const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4;
+
+console.log(`MAX_METADATA_LEN = ${MAX_METADATA_LEN}`);
+
+
+// const TOKEN_METADATA_PROGRAM = new PublicKey('cndy3Z4yapfJBmL3ShUp5exZKqR3z33thTzeNMm2gRZ');
+const candyMachineId = new PublicKey('trshC9cTgL3BPXoAbp5w9UfnUMWEJx5G61vUijXPMLH');
+
+// const getMintAddresses = async (firstCreatorAddress: PublicKey) => {
+//   const metadataAccounts = await connection.getProgramAccounts(
+//     TOKEN_METADATA_PROGRAM,
+//     {
+//       // The mint address is located at byte 33 and lasts for 32 bytes.
+//       dataSlice: { offset: 33, length: 32 },
+
+//       filters: [
+//         // Only get Metadata accounts.
+//         { dataSize: MAX_METADATA_LEN },
+
+//         // Filter using the first creator.
+//         {
+//           memcmp: {
+//             offset: 1,
+//             bytes: firstCreatorAddress.toBase58(),
+//           },
+//         },
+//       ],
+//     },
+//   );
+
+//   return metadataAccounts.map((metadataAccountInfo) => (
+//     // bs58.encode(metadataAccountInfo.account.data)
+//     (metadataAccountInfo.account.data)
+//   ));
+// };
+
+// getMintAddresses(candyMachineId);
+
+
+
+
+// import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js';
+// import bs58 from 'bs58';
+
+// const connection = new Connection(clusterApiUrl('mainnet-beta'));
+// const MAX_NAME_LENGTH = 32;
+// const MAX_URI_LENGTH = 200;
+// const MAX_SYMBOL_LENGTH = 10;
+// const MAX_CREATOR_LEN = 32 + 1 + 1;
+// const MAX_CREATOR_LIMIT = 5;
+// const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN;
+// const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172;
+// const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4;
+
+const TOKEN_METADATA_PROGRAM = new PublicKey('metaqbxxUerdq28cj1RbAWkYQm3ybzjb6a8bt518x1s');
+const CANDY_MACHINE_V2_PROGRAM = new PublicKey('cndy3Z4yapfJBmL3ShUp5exZKqR3z33thTzeNMm2gRZ');
+// const candyMachineId = new PublicKey('ENTER_YOUR_CANDY_MACHINE_ID_HERE');
+
+const getMintAddresses = async () => {
+  const metadataAccounts = await connection.getProgramAccounts(
+    // TOKEN_METADATA_PROGRAM,
+    new PublicKey('TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA'),
+    {
+      // The mint address is located at byte 33 and lasts for 32 bytes.
+    //   dataSlice: { offset: 33, length: 32 },
+
+      filters: [
+        // Only get Metadata accounts.
+        { dataSize: 165 },
+
+        // Filter using the first creator.
+        {
+          memcmp: {
+            offset: 1,
+            bytes: new PublicKey('trshC9cTgL3BPXoAbp5w9UfnUMWEJx5G61vUijXPMLH').toBase58(),
+          },
+        },
+      ],
+    },
+  );
+  return metadataAccounts;
+
+  return metadataAccounts.map((metadataAccountInfo) => (
+    // bs58.encode(metadataAccountInfo.account.data)
+    (metadataAccountInfo.account.data)
+  ));
+};
+
+const getCandyMachineCreator = async (candyMachine: PublicKey): Promise<[PublicKey, number]> => (
+  PublicKey.findProgramAddress(
+    [Buffer.from('candy_machine'), candyMachine.toBuffer()],
+    CANDY_MACHINE_V2_PROGRAM,
+  )
+);
+
+(async () => {
+
+//   const candyMachineCreator = await getCandyMachineCreator(candyMachineId);
+//   console.log(`candyMachineCreator`);
+//   console.log(candyMachineCreator.toString());
+  
+  const a = await getMintAddresses();
+  console.log(`a`);
+  console.log(a);
+
+})();
\ No newline at end of file
diff --git a/scripts/solana-rpc-app/src/test.ts b/scripts/solana-rpc-app/src/test.ts
new file mode 100644
index 00000000..3a0ec6d9
--- /dev/null
+++ b/scripts/solana-rpc-app/src/test.ts
@@ -0,0 +1,30 @@
+import { Connection } from '@metaplex/js'; 
+import { Metadata } from '@metaplex-foundation/mpl-token-metadata';
+import { PublicKey } from '@solana/web3.js';
+
+(async () => {
+  const connection = new Connection('mainnet-beta');
+//   const tokenMint = '9ARngHhVaCtH5JFieRdSS5Y8cdZk2TMF4tfGSWFB9iSK';
+  const tokenMint = '5XKoz4nuPFU78jcEVREMZoh9kKsYnCvrTAmpRzvVdJp1';
+  const metadataPDA = await Metadata.getPDA(new PublicKey(tokenMint));
+//   Metadata.getCandyMachineCreator()
+//   Metadata.getPDA()
+  const tokenMetadata = await Metadata.load(connection, metadataPDA);
+  console.log(tokenMetadata.data);
+  /*
+    MetadataData {
+      key: 4,
+      updateAuthority: '9uBX3ASjxWvNBAD1xjbVaKA74mWGZys3RGSF7DdeDD3F',
+      mint: '9ARngHhVaCtH5JFieRdSS5Y8cdZk2TMF4tfGSWFB9iSK',
+      data: MetadataDataData {
+        name: 'SMB #1355',
+        symbol: 'SMB',
+        uri: 'https://arweave.net/3wXyF1wvK6ARJ_9ue-O58CMuXrz5nyHEiPFQ6z5q02E',
+        sellerFeeBasisPoints: 500,
+        creators: [ [Creator] ]
+      },
+      primarySaleHappened: 1,
+      isMutable: 1
+    }
+  */
+})();
\ No newline at end of file
diff --git a/scripts/solana-rpc-app/src/test2.ts b/scripts/solana-rpc-app/src/test2.ts
new file mode 100644
index 00000000..d1d73e0f
--- /dev/null
+++ b/scripts/solana-rpc-app/src/test2.ts
@@ -0,0 +1,57 @@
+import { Connection, clusterApiUrl, PublicKey } from '@solana/web3.js';
+// import bs58 from 'bs58';
+
+const connection = new Connection(clusterApiUrl('mainnet-beta'));
+const MAX_NAME_LENGTH = 32;
+const MAX_URI_LENGTH = 200;
+const MAX_SYMBOL_LENGTH = 10;
+const MAX_CREATOR_LEN = 32 + 1 + 1;
+const MAX_CREATOR_LIMIT = 5;
+const MAX_DATA_SIZE = 4 + MAX_NAME_LENGTH + 4 + MAX_SYMBOL_LENGTH + 4 + MAX_URI_LENGTH + 2 + 1 + 4 + MAX_CREATOR_LIMIT * MAX_CREATOR_LEN;
+const MAX_METADATA_LEN = 1 + 32 + 32 + MAX_DATA_SIZE + 1 + 1 + 9 + 172;
+const CREATOR_ARRAY_START = 1 + 32 + 32 + 4 + MAX_NAME_LENGTH + 4 + MAX_URI_LENGTH + 4 + MAX_SYMBOL_LENGTH + 2 + 1 + 4;
+
+// const TOKEN_METADATA_PROGRAM = new PublicKey('metaqbxxUerdq28cj1RbAWkYQm3ybzjb6a8bt518x1s');
+const TOKEN_METADATA_PROGRAM = new PublicKey('TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA');
+const candyMachineId = new PublicKey('8mNmf15xNrMFQLNSNrHxxswy7a1NfaSFwXHkVUPeMWwU');
+
+const getMintAddresses = async (firstCreatorAddress: PublicKey) => {
+  const metadataAccounts = await connection.getProgramAccounts(
+    TOKEN_METADATA_PROGRAM,
+    {
+      // The mint address is located at byte 33 and lasts for 32 bytes.
+    //   dataSlice: { offset: 33, length: 32 },
+
+      filters: [
+        // Only get Metadata accounts.
+        // { dataSize: MAX_METADATA_LEN },
+        { dataSize: 165 },
+
+        // Filter using the first creator.
+        {
+          memcmp: {
+            // offset: CREATOR_ARRAY_START,
+            // bytes: firstCreatorAddress.toBase58(),
+            offset: 1,
+            bytes: new PublicKey('4FYjfa71puV4PD12cyqXotu6z2FhLiqFSHjEfYiFLnbj').toBase58(),
+          },
+        },
+      ],
+    },
+  );
+  return metadataAccounts;
+
+//   return metadataAccounts.map((metadataAccountInfo) => (
+//     bs58.encode(metadataAccountInfo.account.data)
+//   ));
+};
+
+
+(async () => {
+
+      const a = await getMintAddresses(candyMachineId);
+      console.log(`a`);
+      console.log(a);
+      console.log(a.length);
+    
+    })();
\ No newline at end of file
diff --git a/scripts/solana-rpc-app/tsconfig.json b/scripts/solana-rpc-app/tsconfig.json
new file mode 100644
index 00000000..b6152daf
--- /dev/null
+++ b/scripts/solana-rpc-app/tsconfig.json
@@ -0,0 +1,101 @@
+{
+  "compilerOptions": {
+    /* Visit https://aka.ms/tsconfig.json to read more about this file */
+
+    /* Projects */
+    // "incremental": true,                              /* Enable incremental compilation */
+    // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
+    // "tsBuildInfoFile": "./",                          /* Specify the folder for .tsbuildinfo incremental compilation files. */
+    // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects */
+    // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
+    // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
+
+    /* Language and Environment */
+    "target": "es2016",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
+    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
+    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
+    // "experimentalDecorators": true,                   /* Enable experimental support for TC39 stage 2 draft decorators. */
+    // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
+    // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */
+    // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
+    // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */
+    // "reactNamespace": "",                             /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */
+    // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
+    // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
+
+    /* Modules */
+    "module": "commonjs",                                /* Specify what module code is generated. */
+    // "rootDir": "./",                                  /* Specify the root folder within your source files. */
+    // "moduleResolution": "node",                       /* Specify how TypeScript looks up a file from a given module specifier. */
+    // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
+    // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
+    // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
+    // "typeRoots": [],                                  /* Specify multiple folders that act like `./node_modules/@types`. */
+    // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
+    // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
+    // "resolveJsonModule": true,                        /* Enable importing .json files */
+    // "noResolve": true,                                /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */
+
+    /* JavaScript Support */
+    // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */
+    // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
+    // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */
+
+    /* Emit */
+    // "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
+    // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
+    // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
+    "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
+    // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */
+    "outDir": "dist",                                   /* Specify an output folder for all emitted files. */
+    // "removeComments": true,                           /* Disable emitting comments. */
+    // "noEmit": true,                                   /* Disable emitting files from a compilation. */
+    // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
+    // "importsNotUsedAsValues": "remove",               /* Specify emit/checking behavior for imports that are only used for types */
+    // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
+    // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
+    // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
+    // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
+    // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
+    // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
+    // "newLine": "crlf",                                /* Set the newline character for emitting files. */
+    // "stripInternal": true,                            /* Disable emitting declarations that have `@internal` in their JSDoc comments. */
+    // "noEmitHelpers": true,                            /* Disable generating custom helper functions like `__extends` in compiled output. */
+    // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
+    // "preserveConstEnums": true,                       /* Disable erasing `const enum` declarations in generated code. */
+    // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
+    // "preserveValueImports": true,                     /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
+
+    /* Interop Constraints */
+    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
+    // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
+    "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */
+    // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
+    "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
+
+    /* Type Checking */
+    "strict": true,                                      /* Enable all strict type-checking options. */
+    // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied `any` type.. */
+    // "strictNullChecks": true,                         /* When type checking, take into account `null` and `undefined`. */
+    // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
+    // "strictBindCallApply": true,                      /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */
+    // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
+    // "noImplicitThis": true,                           /* Enable error reporting when `this` is given the type `any`. */
+    // "useUnknownInCatchVariables": true,               /* Type catch clause variables as 'unknown' instead of 'any'. */
+    // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
+    // "noUnusedLocals": true,                           /* Enable error reporting when a local variables aren't read. */
+    // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read */
+    // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
+    // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
+    // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
+    // "noUncheckedIndexedAccess": true,                 /* Include 'undefined' in index signature results */
+    // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
+    // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type */
+    // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
+    // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
+
+    /* Completeness */
+    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
+    "skipLibCheck": true                                 /* Skip type checking all .d.ts files. */
+  }
+}
diff --git a/solana_model.py b/solana_model.py
index b85f8d9e..0f8357ad 100644
--- a/solana_model.py
+++ b/solana_model.py
@@ -2,10 +2,12 @@ import collections
 import os
 import re
 import json
+from textwrap import indent
 import warnings
 import requests
 import numpy as np
 import pandas as pd
+import kutils as ku
 import urllib.request
 import tensorflow as tf
 import snowflake.connector
@@ -13,7 +15,7 @@ import snowflake.connector
 from curses import meta
 from copy import deepcopy
 from datetime import datetime
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
 from sklearn.linear_model import LinearRegression, RidgeCV, Lasso, Ridge
 from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
 
@@ -27,649 +29,694 @@ warnings.filterwarnings('ignore')
 #     Define Helper Functions     #
 ###################################
 def standardize_df(df, cols, usedf=None, verbose=False):
-    for c in cols:
-        if type(usedf) != type(pd.DataFrame()):
-            usedf = df
-        mu = usedf[c].mean()
-        sd = usedf[c].std()
-        if verbose:
-            print(c)
-        if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
-            # df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
-            df['std_{}'.format(c)] = df[c]
-        else:
-            df['std_{}'.format(c)] = (df[c] - mu) / sd
-    return(df)
+	for c in cols:
+		if type(usedf) != type(pd.DataFrame()):
+			usedf = df
+		mu = usedf[c].mean()
+		sd = usedf[c].std()
+		if verbose:
+			print(c)
+		if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
+			# df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
+			df['std_{}'.format(c)] = df[c]
+		else:
+			df['std_{}'.format(c)] = (df[c] - mu) / sd
+	return(df)
 
-def merge(left, right, on=None, how='inner', ensure=True, verbose=True):
-    df = left.merge(right, on=on, how=how)
-    if len(df) != len(left) and (ensure or verbose):
-        print('{} -> {}'.format(len(left), len(df)))
-        cur = left.merge(right, on=on, how='left')
-        cols = set(right.columns).difference(set(left.columns))
-        print(cols)
-        col = list(cols)[0]
-        missing = cur[cur[col].isnull()]
-        print(missing.head())
-        if ensure:
-            assert(False)
-    return(df)
+def merge(left, right, on=None, how='inner', ensure=True, verbose=True, message = ''):
+	df = left.merge(right, on=on, how=how)
+	if len(df) != len(left) and (ensure or verbose):
+		if message:
+			print(message)
+		print('{} -> {}'.format(len(left), len(df)))
+		cur = left.merge(right, on=on, how='left')
+		cols = set(right.columns).difference(set(left.columns))
+		print(cols)
+		if ensure:
+			col = list(cols)[0]
+			missing = cur[cur[col].isnull()]
+			print(missing.head())
+			assert(False)
+	return(df)
 
 def just_float(x):
-    x = re.sub('[^\d\.]', '', str(x))
-    return(float(x))
+	x = re.sub('[^\d\.]', '', str(x))
+	return(float(x))
 
 def calculate_percentages(df, cols=[]):
-    add_pct = not 'pct' in df.columns
-    if not len(cols):
-        cols = df.columns
-    if add_pct:
-        df['pct'] = 1
-    for c in cols:
-        g = df[c].value_counts().reset_index()
-        g.columns = [ c, 'N' ]
-        col = '{}_pct'.format(c)
-        g[col] = g.N / g.N.sum()
-        df = df.merge( g[[ c, col ]] )
-        if add_pct:
-            df['pct'] = df.pct * df[col]
-    return(df)
+	add_pct = not 'pct' in df.columns
+	if not len(cols):
+		cols = df.columns
+	if add_pct:
+		df['pct'] = 1
+	for c in cols:
+		g = df[c].value_counts().reset_index()
+		g.columns = [ c, 'N' ]
+		col = '{}_pct'.format(c)
+		g[col] = g.N / g.N.sum()
+		df = df.merge( g[[ c, col ]] )
+		if add_pct:
+			df['pct'] = df.pct * df[col]
+	return(df)
 
 def get_sales(check_exclude = True, exclude=[]):
 
-    s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
-    s_df['token_id'] = s_df.token_id.astype(str)
-    s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
-    s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
-    s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
-    s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
-    for e in exclude:
-        s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
-    s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
+	s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
+	s_df['token_id'] = s_df.token_id.astype(str)
+	s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
+	s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
+	s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
+	s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
+	for e in exclude:
+		s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
+	s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
 
-    # exclude wierd data points
-    if not check_exclude:
-        exclude = pd.read_csv('./data/exclude.csv')
-        exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
-        exclude['token_id'] = exclude.token_id.astype(str)
-        s_df = s_df.merge(exclude, how='left')
-        s_df = s_df[s_df.exclude.isnull()]
-        del s_df['exclude']
+	# exclude wierd data points
+	if not check_exclude:
+		exclude = pd.read_csv('./data/exclude.csv')
+		exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
+		exclude['token_id'] = exclude.token_id.astype(str)
+		s_df = s_df.merge(exclude, how='left')
+		s_df = s_df[s_df.exclude.isnull()]
+		del s_df['exclude']
 
-    ###########################
-    #     Calculate Floor     #
-    ###########################
-    s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
-    s_df['timestamp'] = s_df.block_timestamp.astype(int)
-    s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
+	###########################
+	#     Calculate Floor     #
+	###########################
+	s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
+	s_df['timestamp'] = s_df.block_timestamp.astype(int)
+	s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
 
-    # lowest price in last 20 sales
-    s_df = s_df.sort_values(['collection','block_timestamp'])
-    s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-    s_df = s_df.sort_values(['collection','block_timestamp'])
-    s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
+	# lowest price in last 20 sales
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
 
-    # exclude sales that are far below the existing floor
-    s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
+	# exclude sales that are far below the existing floor
+	s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
 
-    # 10%ile of last 20 sales
-    s_df = s_df.sort_values(['collection','block_timestamp'])
-    s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-    s_df = s_df.sort_values(['collection','block_timestamp'])
-    s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
-    s_df['sim'] = 0
-    s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
-    s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
-    return(s_df)
+	# 10%ile of last 20 sales
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True)
+	s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.0525).reset_index(0,drop=True)
+	s_df['sim'] = 0
+	s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
+	s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
+	return(s_df)
+
+def get_coefs(cols, coef):
+	coefs = []
+	for a, b in zip(cols, coef):
+		coefs += [[a,b]]
+	coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
+	# coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
+	# coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x )
+	# coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 )
+	coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 or 'adj_nft_rank' in x['col'] or 'is_top_' in x['col'] or 'y_pred_' in x['col'] else -1 , 1 )
+	coefs['val'] = coefs.mult * coefs.coef
+	coefs = coefs.sort_values('val', ascending=0)
+	return(coefs)
 
 def train_model(check_exclude, supplement_with_listings):
-    exclude = [
-        ( 'aurory', 2239, 3500 )
-    ]
-    s_df = get_sales(check_exclude, exclude)
-    # s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
-    # s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
-    # s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
-    # s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
-    # s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
-    # for e in exclude:
-    #     s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
-    # s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
+	exclude = [
+		( 'aurory', 2239, 3500 )
+		, ( 'aurory', 1876, 789 )
+		, ( 'aurory', 2712, 500 )
+		, ( 'aurory', 5368, 500 )
+		, ( 'aurory', 9239, 1700 )
+	]
+	s_df = get_sales(check_exclude, exclude)
+	# s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
+	# s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
+	# s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
+	# s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
+	# s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
+	# for e in exclude:
+	#     s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
+	# s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
 
-    # # exclude wierd data points
-    # if not check_exclude:
-    #     exclude = pd.read_csv('./data/exclude.csv')
-    #     exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
-    #     s_df = s_df.merge(exclude, how='left')
-    #     s_df = s_df[s_df.exclude.isnull()]
-    #     del s_df['exclude']
+	# # exclude wierd data points
+	# if not check_exclude:
+	#     exclude = pd.read_csv('./data/exclude.csv')
+	#     exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
+	#     s_df = s_df.merge(exclude, how='left')
+	#     s_df = s_df[s_df.exclude.isnull()]
+	#     del s_df['exclude']
 
-    #########################
-    #     Load Metadata     #
-    #########################
-    m_df = pd.read_csv('./data/metadata.csv')
-    m_df['token_id'] = m_df.token_id.astype(str)
-    m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
-    m_df['token_id'] = m_df.token_id.astype(str)
-    # remove ones that are not actually metadata
-    m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
-    m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x )
-    m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')]
-    sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique())
+	#########################
+	#     Load Metadata     #
+	#########################
+	m_df = pd.read_csv('./data/metadata.csv')
+	# m_df[m_df.collection == 'Aurory'][['collection','feature_name']].drop_duplicates().to_csv('~/Downloads/tmp.csv', index=False)
+	sorted([x for x in m_df.feature_name.unique() if 'nft_' in x])
+	m_df['token_id'] = m_df.token_id.astype(str)
+	m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
+	# remove ones that are not actually metadata
+	m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
+	m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x )
+	m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')]
+	sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique())
 
 
-    #####################################
-    #     Exclude Special LunaBulls     #
-    #####################################
-    tokens = pd.read_csv('./data/tokens.csv')
-    tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
-    tokens.token_id.unique()
-    lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique()
-    m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ]
-    s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ]
-    s_df = s_df.drop_duplicates(subset=['collection','token_id','price'])
+	#####################################
+	#     Exclude Special LunaBulls     #
+	#####################################
+	tokens = pd.read_csv('./data/tokens.csv')
+	tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
+	tokens['token_id'] = tokens.token_id.astype(str)
+	m_df = merge(m_df, tokens[['collection','token_id','clean_token_id']], how='left', ensure=True, on=['collection','token_id'], message='m_df x tokens')
+	m_df['token_id'] = m_df.clean_token_id.fillna(m_df.token_id).astype(int).astype(str)
+	s_df = merge(s_df, tokens[['collection','token_id','clean_token_id']], how='left', ensure=True, on=['collection','token_id'], message='s_df x tokens')
+	s_df['token_id'] = s_df.clean_token_id.fillna(s_df.token_id).astype(int).astype(str)
+	tokens.token_id.unique()
+	lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique()
+	m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ]
+	s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ]
+	s_df = s_df.drop_duplicates(subset=['collection','token_id','price'])
 
 
-    ###########################
-    #     Calculate Floor     #
-    ###########################
-    # s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
-    # s_df['timestamp'] = s_df.block_timestamp.astype(int)
-    # s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
+	###########################
+	#     Calculate Floor     #
+	###########################
+	# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
+	# s_df['timestamp'] = s_df.block_timestamp.astype(int)
+	# s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
 
-    # # lowest price in last 20 sales
-    # s_df = s_df.sort_values(['collection','block_timestamp'])
-    # s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-    # s_df = s_df.sort_values(['collection','block_timestamp'])
-    # s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
+	# # lowest price in last 20 sales
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
 
-    # # exclude sales that are far below the existing floor
-    # s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
+	# # exclude sales that are far below the existing floor
+	# s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
 
-    # # 10%ile of last 20 sales
-    # s_df = s_df.sort_values(['collection','block_timestamp'])
-    # s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-    # s_df = s_df.sort_values(['collection','block_timestamp'])
-    # s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
-    # s_df['sim'] = 0
-    # s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
-    # s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
+	# # 10%ile of last 20 sales
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
+	# s_df['sim'] = 0
+	# s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
+	# s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
 
-    if supplement_with_listings:
-        pred_price = pd.read_csv('./data/pred_price.csv')
-        pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
-        listings = pd.read_csv('./data/listings.csv')
-        listings['collection'] = listings.collection.apply(lambda x: clean_name(x))
-        listings['block_timestamp'] = s_df.block_timestamp.max()
-        floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']]
-        tmp = merge(listings, pred_price, ensure=False)
-        tmp = tmp[tmp.price < tmp.pred_price]
-        tmp['timestamp'] = tmp.block_timestamp.astype(int)
-        tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
-        tmp = merge(tmp, floor)
+	listings = pd.read_csv('./data/listings.csv')
+	if supplement_with_listings:
+		pred_price = pd.read_csv('./data/pred_price.csv')
+		pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
+		listings['collection'] = listings.collection.apply(lambda x: clean_name(x))
+		listings['block_timestamp'] = s_df.block_timestamp.max()
+		listings = listings[listings.collection.isin(pred_price.collection.unique())]
+		floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']]
+		tmp = merge(listings, pred_price, ensure=False)
+		tmp = tmp[tmp.price < tmp.pred_price]
+		tmp['timestamp'] = tmp.block_timestamp.astype(int)
+		tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
+		tmp = merge(tmp, floor)
 
-        n = round(len(s_df) / 5000)
-        n = max(1, min(2, n))
-        # n = 1
-        for _ in range(n):
-            s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
-            # tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price]
-            # s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
-            # tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price]
-            # tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
+		n = round(len(s_df) / 5000)
+		n = max(1, min(3, n))
+		print('Supplement with {}x listings'.format(n))
+		# n = 1
+		for _ in range(n):
+			s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
+			# tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price]
+			# s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
+			# tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price]
+			# tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
 
 
-    ###########################
-    #     Calculate Floor     #
-    ###########################
-    coefsdf = pd.DataFrame()
-    salesdf = pd.DataFrame()
-    attributes = pd.DataFrame()
-    pred_price = pd.DataFrame()
-    feature_values = pd.DataFrame()
-    # non-binary in model: collection_rank, temperature, weight
-    # non-binary in model; exclude from rarity: pct, rank, score
-    # exclude from model: lucky_number, shower
-    # exclude from model and rarity %: meteor_id, attribute_count, cracking_date
-    ALL_NUMERIC_COLS = ['rank','score','pct']
-    ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2']
-    MODEL_EXCLUDE_COLS = {
-        # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
-        'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight']
-        , 'Solana Monkey Business': ['Clothes_Diamond']
-    }
-    MODEL_INCLUDE_COLS = {
-        # 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black']
-        'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana']
-    }
-    RARITY_EXCLUDE_COLS = {
-        # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
-        'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group']
-    }
-    NUMERIC_COLS = {
-        'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank']
-    }
-    ATT_EXCLUDE_COLS = {
-        'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group']
-    }
-    collection = 'Solana Monkey Business'
-    # for collection in s_df.collection.unique():
-    for collection in [ 'Solana Monkey Business' ]:
-        print('Working on collection {}'.format(collection))
-        sales = s_df[ s_df.collection == collection ]
-        metadata = m_df[ m_df.collection == collection ]
-        metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False)
-        metadata[metadata.token_id == '1']
-        metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() )
-        metadata[metadata.token_id == '1']
-        metadata[metadata.feature_name == 'rank']
-        metadata.feature_name.unique()
-        metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')]
+	###########################
+	#     Calculate Floor     #
+	###########################
+	# coefsdf = pd.DataFrame()
+	# salesdf = pd.DataFrame()
+	# attributes = pd.DataFrame()
+	# pred_price = pd.DataFrame()
+	# feature_values = pd.DataFrame()
+	coefsdf = pd.read_csv('./data/coefsdf.csv')
+	salesdf = pd.read_csv('./data/model_sales.csv')
+	attributes = pd.read_csv('./data/attributes.csv')
+	pred_price = pd.read_csv('./data/pred_price.csv')
+	feature_values = pd.read_csv('./data/feature_values.csv')
+	# non-binary in model: collection_rank, temperature, weight
+	# non-binary in model; exclude from rarity: pct, rank, score
+	# exclude from model: lucky_number, shower
+	# exclude from model and rarity %: meteor_id, attribute_count, cracking_date
+	ALL_NUMERIC_COLS = ['rank','score','pct']
+	ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2']
+	MODEL_EXCLUDE_COLS = {
+		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
+		'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight']
+		, 'Solana Monkey Business': ['Clothes_Diamond']
+	}
+	MODEL_INCLUDE_COLS = {
+		# 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black']
+		'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana']
+	}
+	RARITY_EXCLUDE_COLS = {
+		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
+		'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group']
+	}
+	NUMERIC_COLS = {
+		'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank']
+	}
+	ATT_EXCLUDE_COLS = {
+		'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group']
+	}
 
-        # categorize columns
-        all_names = sorted(metadata.feature_name.unique())
-        model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else []
-        num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
-        num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
-        num_metadata = metadata[metadata.feature_name.isin(num_features)]
-        num_metadata[num_metadata.feature_name == 'nft_rank']
-        cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
-        cat_metadata = metadata[metadata.feature_name.isin(cat_features)]
+	collection = 'Aurory'
+	collection = 'Solana Monkey Business'
+	collection = 'LunaBulls'
+	# for collection in [ 'Solana Monkey Business' ]:
+	# for collection in [ 'Aurory' ]:
+	# for collection in [ 'Aurory','Solana Monkey Business' ]:
+	collections = list(s_df[['collection']].drop_duplicates().merge(m_df[['collection']].drop_duplicates()).collection.unique())
+	for collection in [ 'LunaBulls' ]:
+		for df in [ coefsdf, salesdf, attributes, pred_price, feature_values ]:
+			if 'collection' in df.columns:
+				df = df[df.collection != collection]
+		print('Working on collection {}'.format(collection))
+		sales = s_df[ s_df.collection == collection ]
+		metadata = m_df[ m_df.collection == collection ]
+		metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False)
+		metadata[metadata.token_id == '1']
+		metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() )
+		metadata[metadata.token_id == '1']
+		metadata[metadata.feature_name == 'rank']
+		metadata.feature_name.unique()
+		metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')]
 
-        # create dummies for binary variables
-        num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
-        num_metadata.columns = [ 'collection','token_id' ] + num_features
+		# categorize columns
+		all_names = sorted(metadata.feature_name.unique())
+		model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else []
+		num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
+		num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
+		num_metadata = metadata[metadata.feature_name.isin(num_features)]
+		num_metadata[num_metadata.feature_name == 'nft_rank']
+		cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
+		cat_metadata = metadata[metadata.feature_name.isin(cat_features)]
 
-        # create dummies for binary variables
-        cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
-        cat_metadata.columns = [ 'collection','token_id' ] + cat_features
-        cat_metadata = calculate_percentages( cat_metadata, cat_features )
-        dummies = pd.get_dummies(cat_metadata[cat_features])
-        dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False)
-        if collection == 'Solana Monkey Business':
-            dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int)
-            dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int)
-            dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int)
-            dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int)
-            # dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int)
-            # dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int)
-            # dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int)
-            dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int)
-            dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int)
-            del dummies['matching_white']
-            del dummies['matching_black']
-        cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
-        del cat_metadata['pct']
+		# create dummies for binary variables
+		num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
+		num_metadata.columns = [ 'collection','token_id' ] + num_features
 
-        for c in model_exclude:
-            if c in dummies.columns:
-                del dummies[c]
-        pred_cols = num_features + list(dummies.columns)
+		# create dummies for binary variables
+		cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
+		cat_metadata.columns = [ 'collection','token_id' ] + cat_features
+		cat_metadata = calculate_percentages( cat_metadata, cat_features )
+		dummies = pd.get_dummies(cat_metadata[cat_features])
+		# dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False)
+		if collection == 'Solana Monkey Business':
+			dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int)
+			dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int)
+			dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int)
+			dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int)
+			# dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int)
+			# dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int)
+			# dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int)
+			dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int)
+			dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int)
+			del dummies['matching_white']
+			del dummies['matching_black']
+		cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
+		del cat_metadata['pct']
 
-        # create training df
-        df = merge(sales, num_metadata, ['collection','token_id'], ensure=False)
-        df = merge(df, cat_metadata, ['collection','token_id'])
-        df[df.adj_nft_rank_0 == 'None']
-        df[df.adj_nft_rank_0 == 'None'][['collection','token_id','nft_rank','adj_nft_rank_0']]
-        df.adj_nft_rank_0.unique()
-        for c in num_features:
-            df[c].unique()
-            df[df.nft_rank == 'None']
-            df[df[c] == 'None'][[ 'nft_rank' ]]
-            df[c] = df[c].apply(lambda x: just_float(x))
-        df.sort_values('price', ascending=0)[['price']].head(20)
-        # df.groupby(['rarity','weight']).price.mean()
+		for c in model_exclude:
+			if c in dummies.columns:
+				del dummies[c]
+		pred_cols = num_features + list(dummies.columns)
+		pred_cols = [ c for c in pred_cols if not c in model_exclude ]
 
-        # create target cols
-        target_col = 'adj_price'
-        df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
-        df = df[df[target_col].notnull()]
-        df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
-        df['rel_price_0'] = df[target_col] - df.mn_20
-        df['rel_price_1'] = df[target_col] / df.mn_20
-        df = df[df.mn_20 > 0]
-        df['log_mn_20'] = np.log(df.mn_20)
-        print('Training on {} sales'.format(len(df)))
-        df = standardize_df(df, pred_cols)
-
-        std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ]
-        std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ]
-
-        #########################
-        #     Run the Model     #
-        #########################
-        tmp = df[std_pred_cols].count().reset_index()
-        tmp.columns = ['a','b']
-        tmp.sort_values('b').head(20)
-        rem = list(tmp[tmp.b==0].a.values)
-        std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
-        if collection == 'Levana Dragon Eggs':
-            std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
-        mn = df.timestamp.min()
-        mx = df.timestamp.max()
-        df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) )
-        if collection == 'Levana Dragon Eggs':
-            df['wt'] = 1
-        #     df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) )
-        #     df.sort_values('price', ascending=0)[['price','wt']].head(20)
-        # std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ]
-        cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
-        cur_std_pred_cols = deepcopy(std_pred_cols)
-        g = df[std_pred_cols].sum().reset_index()
-        g.columns = [ 'col','cnt' ]
-        g = g.sort_values('cnt')
-        g.head(20)
-        if collection == 'Solana Monkey Busines':
-            df.loc[ df.token_id == '903', 'nft_rank' ] = 18
-            df[df.token_id=='903']
-            df[df.token_id==903]
-        X = df[cur_std_pred_cols].values
-        y_0 = df.rel_price_0.values
-        y_1 = df.rel_price_1.values
-        # df['tmp'] = df.collection_rank.apply(lambda x: int((8888 - x)/1000) )
-        # g = df.groupby('tmp').rel_price_0.mean().reset_index()
-        # g['g'] = g.tmp.apply(lambda x: (((1.42**(x**1.42)) - 1) / 20) + 0.13 )
-        # g['g'] = g.tmp.apply(lambda x: 2**x )
-        # g
-
-        # run the linear model
-        # clf_lin = Lasso(alpha=1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
-
-        # clf_lin = Ridge(alpha=1000)
-        # clf_lin = Ridge(alpha=100)
-        # clf_lin.fit(X, y_0, df.wt.values)
-        # clf_las = Lasso(alpha=1.5)
-        # clf_las.fit(X, y_0, df.wt.values)
-        # clf_rfr = RandomForestRegressor()
-        # clf_rfr.fit(X, y_0)
-        # clf_rfr.feature_importances_
-        # imp = []
-        # for a, b, c, d in zip(cur_std_pred_cols, clf_rfr.feature_importances_, clf_lin.coef_, clf_las.coef_):
-        #     imp += [[a, b, abs(c), abs(d)]]
-        # imp = pd.DataFrame(imp, columns=['col','imp','lin','las']).sort_values('imp', ascending=0)
-        # imp['imp_rk'] = imp.imp.rank(ascending=0)
-        # imp['lin_rk'] = imp.lin.rank(ascending=0)
-        # imp['las_rk'] = imp.las.rank(ascending=0)
-        # imp['include'] = 0
-        # imp.to_csv('~/Downloads/coef.csv', index=False)
-        # imp.head(50).tail(20)
-        # imp.head(40).tail(10)
-        # imp.head(50).tail(10)
-        # nft_rank should be negative
-        # adj_nft_rank_0 should be positive
-        # adj_nft_rank_1 should be positive
-        clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
-        clf_lin = Ridge(alpha=30, fit_intercept=True)
-        clf_lin = Lasso(alpha=.225)
-        def get_coefs(cols, coef):
-            coefs = []
-            for a, b in zip(cols, coef):
-                coefs += [[a,b]]
-            coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
-            # coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
-            coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x )
-            # coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 )
-            coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 else -1 , 1 )
-            coefs['val'] = coefs.mult * coefs.coef
-            coefs = coefs.sort_values('val', ascending=0)
-            return(coefs)
-
-        mn = -1
-        print('Starting with {} cols'.format(len(cur_std_pred_cols)))
-        while mn < 0 or len(cur_std_pred_cols) > 140:
-            X = df[cur_std_pred_cols].values
-            clf_lin.fit(X, y_0, df.wt.values)
-            coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_)
-            tmp = coefs[coefs.tmp == True]
-            mn = min(coefs.val) if len(coefs) else 0
-            if mn < 0:
-                cur_std_pred_cols.remove(coefs.col.values[-1])
-            else:
-                cur_std_pred_cols.remove(coefs.col.values[-1])
-                coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
-                len(coefs[coefs.coef !=0])
-        # print(coefs[coefs.coef !=0])
-        # print(len(coefs[coefs.coef !=0]))
-        INCLUDE_COLS = MODEL_INCLUDE_COLS[collection] if collection in MODEL_INCLUDE_COLS.keys() else []
-
-        # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
-
-        cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique())
-        for c in INCLUDE_COLS:
-            if not c in cur_std_pred_cols:
-                cur_std_pred_cols.append(c)
-        lin_std_pred_cols = cur_std_pred_cols
-        X = df[cur_std_pred_cols].values
-        # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
-        # clf_lin = Lasso(alpha=0.1)
-        clf_lin = Lasso(alpha=.1)
-        clf_lin.fit(X, y_0, df.wt.values)
-        coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_)
-        print(coefs[coefs.coef !=0])
-        print(len(coefs[coefs.coef !=0]))
-        print(coefs[coefs.col.isin(INCLUDE_COLS)])
-        coefs[coefs.coef !=0].to_csv('./data/coefs/{}_lin_coefs.csv'.format(collection), index=False)
-        df[df['std_Attribute Count_0']!=0]
-        df['std_Attribute Count_0'].unique()
-        coefs[coefs.col.isin(INCLUDE_COLS)]
-        df['pred'] = clf_lin.predict(X)
-        df['err'] = df.pred - df.rel_price_0
-        df[df['std_Hat_Space Warrior Hair'] == 1][['pred',target_col]].mean()
-        df[df['std_Hat_Space Warrior Hair'] == 1].err.median()
-        tmp = []
-        for c in std_pred_cols:
-            if len(df[df[c] == 1]):
-                mu = round(df[df[c] == 1].err.mean())
-                md = round(df[df[c] == 1].err.median())
-                n = len(df[df[c] == 1])
-                tmp += [[ c, int(c in cur_std_pred_cols ), n, mu, md ]]
-                # print('{}: {}, {}, {}'.format(c, mu, md, n))
-        tmp = pd.DataFrame(tmp, columns=['c','i','n','mu','md']).sort_values('mu')
-        tmp.to_csv('~/Downloads/tmp4.csv', index=False)
-        tmp[tmp.i == 0].head(8)
-        tmp[tmp.i == 0].tail(8)
-        'std_Hat_Crown','std_Attribute Count_0','std_Hat_Space Warrior Hair','std_Eyes_Laser Eyes','std_Type_Solana',''
-        df[df['std_Hat_Space Warrior Hair'] == 1].err.mean()
-        df[df['std_Hat_Strawhat'] == 1][['pred','rel_price_0']].mean()
-
-        df['pred_lin'] = clf_lin.predict(X)
-        df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
-        df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) )
-        # df[df.genus_Titan==1][['rarity']]
-        # df[(df.rarity=='Legendary') | (df.genus=='Titan')][['genus','rarity']]
-
-        # run the log model
-        # clf_log = Lasso(1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
-        clf_log = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
-        clf_log = Ridge(alpha=30)
-        clf_log = Lasso(0.003)
-        # clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
-
-        mn = -1
-        cur_std_pred_cols = deepcopy(std_pred_cols)
-        while mn < 0 or len(cur_std_pred_cols) > 140:
-            X = df[cur_std_pred_cols].values
-            clf_log.fit(X, y_1, df.wt.values)
-            coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
-            tmp = coefs[coefs.tmp == True]
-            mn = min(tmp.coef) if len(tmp) else 0
-            if mn < 0:
-                cur_std_pred_cols.remove(tmp.col.values[-1])
-            else:
-                cur_std_pred_cols.remove(coefs.col.values[-1])
-        coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
-        coefs[coefs.coef !=0].to_csv('./data/coefs/{}_log_coefs.csv'.format(collection), index=False)
-        # print(coefs[coefs.coef !=0])
-        len(coefs[coefs.coef !=0])
-        # cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique())
-        for c in INCLUDE_COLS:
-            if not c in cur_std_pred_cols:
-                cur_std_pred_cols.append(c)
-        log_std_pred_cols = cur_std_pred_cols
-        X = df[cur_std_pred_cols].values
-        clf_log = Lasso(0.001)
-        clf_log.fit(X, y_1, df.wt.values)
-        coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
-        print(coefs[coefs.coef !=0])
-        print(len(coefs[coefs.coef !=0]))
-        print(coefs[coefs.col.isin(INCLUDE_COLS)])
-        # clf_log.fit(X, y_1, df.wt.values)
-        # if collection == 'Levana Dragon Eggs':
-        #     coefs = []
-        #     for a, b in zip(std_pred_cols, clf_lin.coef_):
-        #         coefs += [[a,b]]
-        #     coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
-        #     coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False)
-        df['pred_log'] = clf_log.predict(X)
-        df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
-        df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) )
-        df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50)
-        df['err'] = df.err_lin * df.err_log
+		# create training df
+		sales['token_id'] = sales.token_id.astype(str)
+		num_metadata['token_id'] = num_metadata.token_id.astype(str)
+		df = merge(sales, num_metadata, ['collection','token_id'], ensure=False)
+		df = merge(df, cat_metadata, ['collection','token_id'], ensure=False)
+		for c in num_features:
+			df[c] = df[c].apply(lambda x: just_float(x))
 
 
-        # combine the models
-        clf = LinearRegression(fit_intercept=False)
-        clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
-        df[['pred_lin','pred_log',target_col]].mean()
-        print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
-        l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
-        tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price'])
-        if clf.coef_[0] < 0:
-            print('Only using log')
-            df['pred'] = df.pred_log
-            tmp['lin_coef'] = 0
-            tmp['log_coef'] = 1
-        elif clf.coef_[1] < 0:
-            print('Only using lin')
-            df['pred'] = df.pred_lin
-            tmp['lin_coef'] = 1
-            tmp['log_coef'] = 0
-        else:
-            print('Only using BOTH!')
-            df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
-        coefsdf = coefsdf.append(tmp)
-        df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
+		#################################
+		#     Create Test DataFrame     #
+		#################################
+		# test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=True, how='left')
+		ensure = not collection in ['Aurory']
+		# test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=ensure)
+		test = merge(num_metadata, cat_metadata, ['collection','token_id'], ensure=False)
+		for c in num_features:
+			test[c] = test[c].apply(lambda x: just_float(x) )
+		tail = df.sort_values('timestamp').tail(1)
+		test.loc[ test.token_id == '903', 'nft_rank' ] = 18
+		for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
+			if c in tail.columns:
+				test[c] = tail[c].values[0]
 
-        # print out some summary stats
-        df['err'] = df[target_col] - df.pred
-        df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df)
-        df['q'] = df.q.apply(lambda x: int(round(x)) )
-        df['pct_err'] = (df[target_col] / df.pred) - 1
-        pe_mu = df.pct_err.mean()
-        pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
-        pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
-        df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
-        df['pred_sd'] = df.pred * pe_sd
-        # print(df.groupby('q')[['err','pred',target_col]].mean())
-        print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean())
-        print(df.groupby('q')[['err','pred',target_col]].mean())
-        # df.err.mean()
-        # df[df.weight >= 3.5].err.mean()
-        df[df.pred < 200].err.mean()
-        df['collection'] = collection
-        print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
-        salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
+		for tmp in [df, test]:
+			for i in [100, 250, 1000]:
+				tmp['is_top_{}'.format(i)] = (tmp.nft_rank <= i).astype(int)
+		pred_cols += [ 'is_top_100','is_top_250','is_top_1000' ]
+		df.sort_values('price', ascending=0)[['price']].head(20)
+		# df.groupby(['rarity','weight']).price.mean()
+
+		# create target cols
+		target_col = 'adj_price'
+		df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
+		df['mn_20'] = df.mn_20 * 1.01
+		df = df[df[target_col].notnull()]
+		df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
+		df['rel_price_0'] = df[target_col] - df.mn_20
+		df['rel_price_1'] = df[target_col] / df.mn_20
+		df = df[df.mn_20 > 0]
+		df['log_mn_20'] = np.log(df.mn_20)
+		print('Training on {} sales'.format(len(df)))
+		df = standardize_df(df, pred_cols)
+		test = standardize_df(test, pred_cols, df)
+
+		std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ]
+		std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ]
+
+		#########################
+		#     Run the Model     #
+		#########################
+		tmp = df[std_pred_cols].count().reset_index()
+		tmp.columns = ['a','b']
+		tmp.sort_values('b').head(20)
+		rem = list(tmp[tmp.b==0].a.values)
+		std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
+		if collection == 'Levana Dragon Eggs':
+			std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
+		mn = df.timestamp.min()
+		mx = df.timestamp.max()
+		df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) )
+		df.loc[ (df.collection == 'Aurory') & (df.block_timestamp <= '2021-09-05'), 'wt' ] = 0.05
+		if collection == 'Levana Dragon Eggs':
+			df['wt'] = 1
+		#     df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) )
+		#     df.sort_values('price', ascending=0)[['price','wt']].head(20)
+		# std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ]
+		cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
+		cur_std_pred_cols = deepcopy(std_pred_cols)
+		g = df[std_pred_cols].sum().reset_index()
+		g.columns = [ 'col','cnt' ]
+		g = g.sort_values('cnt')
+		g.head(20)
+		if collection == 'Solana Monkey Busines':
+			df.loc[ df.token_id == '903', 'nft_rank' ] = 18
+			df[df.token_id=='903']
+			df[df.token_id==903]
+		df = df.reset_index(drop=True)
+		X = df[cur_std_pred_cols].values
+		y_0 = df.rel_price_0.values
+		y_1 = df.rel_price_1.values
+
+		# CUR_FLOOR = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
+		CUR_FLOOR = listings[(listings.collection == collection) & (listings.price.notnull())].price.min()
+		print('CUR_FLOOR = {}'.format(CUR_FLOOR))
+
+		for target_col in [ 'rel_price_0', 'rel_price_1' ]:
+			it = target_col[-1]
+			y_val = df[target_col].values
+			print('target_col = {}'.format(target_col))
+			mn = -1
+			cols = [ 'std_nft_rank','std_adj_nft_rank_0','std_adj_nft_rank_1','std_adj_nft_rank_2' ]
+			clf = Ridge(alpha = 1)
+			# while mn < 0 and len(cols):
+			# 	clf.fit(df[cols].values, y_val, df.wt.values)
+			# 	coefs = get_coefs(cols, clf.coef_)
+			# 	mn = min(coefs.val) if len(coefs) else 0
+			# 	if mn < 0:
+			# 		cols.remove(coefs.col.values[-1])
+
+			col = 'rarity_value_'+it
+			model = 'ridge'
+			df[col] = 0
+			test[col] = 0
+			# df, bst_p, bst_r = ku.get_bst_params( model, df, df[cols].values, y_val, target_col, col, verbose = True, wt_col='wt'  )
+			# test = ku.apply_model( model, bst_p, df, test, cols, target_col, col)
+
+			# df['rarity_value_'+it] = clf.predict(df[cols].values)
+			rar_adj_target_col = 'rar_adj_'+target_col
+			df[rar_adj_target_col] = df[target_col] - df['rarity_value_'+it]
+			# test[rar_adj_target_col] = test[target_col] - test['rarity_value_'+it]
+			y_val_rar_adj = df[rar_adj_target_col].values
+			models = ['las','ridge'] if target_col == 'rel_price_1' else ['las','ridge','rfr']
+			for model in models:
+				cur_std_pred_cols = std_pred_cols
+				print(model)
+				y = y_val_rar_adj if model in ['rfr'] else y_val
+				col = 'y_pred_{}_{}'.format(model, it)
+				df, bst_p, bst_r = ku.get_bst_params( model, df, X, y, target_col, col, verbose = True, wt_col='wt' )
+
+				# if model == 'ridge':
+				# 	while len(cur_std_pred_cols) > 50:
+				# 		coefs = get_coefs(cur_std_pred_cols, clf.coef_)
+				# 		cur_std_pred_cols.remove(coefs.col.values[-1])
+				# 		new_X = df[cur_std_pred_cols].values
+				# 		clf = ku.get_model(model, bst_p)
+				# 		clf.fit(new_X, y)
+				# 		# coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it))
+				# 	new_X = df[cur_std_pred_cols].values
+				# 	df, bst_p, bst_r = ku.get_bst_params( model, df, new_X, y, target_col, col, verbose = True, wt_col='wt' )
+
+				if model in ['las','ridge']:
+					clf = ku.get_model(model, bst_p)
+					clf.fit(X, y)
+					coefs = get_coefs(cur_std_pred_cols, clf.coef_)
+					mn = coefs.val.min()
+					while mn < 0:
+						cur_std_pred_cols = [ c for c in coefs[coefs.val >= 0 ].col.unique() ]
+						X_new = df[cur_std_pred_cols].values
+						clf.fit(X_new, y)
+						# df, bst_p, bst_r = ku.get_bst_params( model, df, df[cur_std_pred_cols].values, y, target_col, col, verbose = True, wt_col='wt' )
+						coefs = get_coefs(cur_std_pred_cols, clf.coef_)
+						mn = coefs.val.min()
+					coefs.to_csv('./data/coefs/{}_{}_{}.csv'.format(collection, model, it), index=False)
+				test = ku.apply_model( model, bst_p, df, test, cur_std_pred_cols, target_col, col)
+				if model in ['rfr']:
+					df[col] = df[col] + df['rarity_value_'+it]
+					test[col] = test[col] + test['rarity_value_'+it]
+
+			mn = -1
+			cols = [ c for c in df.columns if c[:7] == 'y_pred_' and c[-1] == it ]
+			clf = LinearRegression()
+			df[cols].mean()
+			df[cols].median()
+			test[cols].mean()
+			test[cols].median()
+			while mn < 0 and len(cols):
+				clf.fit(df[cols].values, df[target_col].values)
+				coefs = get_coefs(cols, clf.coef_)
+				mn = min(coefs.val) if len(coefs) else 0
+				if mn < 0:
+					cols.remove(coefs.col.values[-1])
+				else:
+					print(coefs)
+			if it == '0':
+				df['pred_lin'] = clf.predict(df[cols].values) + df.mn_20
+				test['pred_lin'] = clf.predict(test[cols].values) + CUR_FLOOR
+				# df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
+			else:
+				df['pred_log'] = clf.predict(df[cols].values)
+				df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
+				test['pred_log'] = clf.predict(test[cols].values)
+				test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * CUR_FLOOR
+
+		clf = LinearRegression(fit_intercept=False)
+		target_col = 'adj_price'
+		clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
+		clf.score( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
+		df[['pred_lin','pred_log',target_col]].mean()
+		df[['pred_lin','pred_log',target_col]].median()
+		test[['pred_lin','pred_log']].mean()
+		test[['pred_lin','pred_log']].median()
+		
+		print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
+		tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], CUR_FLOOR]], columns=['collection','lin_coef','log_coef','floor_price'])
+		if clf.coef_[0] < 0:
+			print('Only using log')
+			df['pred'] = df.pred_log
+			test['pred'] = test.pred_log
+			tmp['lin_coef'] = 0
+			tmp['log_coef'] = 1
+		elif clf.coef_[1] < 0:
+			print('Only using lin')
+			df['pred'] = df.pred_lin
+			test['pred'] = test.pred_lin
+			tmp['lin_coef'] = 1
+			tmp['log_coef'] = 0
+		else:
+			print('Only using BOTH!')
+			df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
+			test['pred'] = clf.predict( test[['pred_lin','pred_log']].values )
+		coefsdf = coefsdf.append(tmp)
+		df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
+
+		# print out some summary stats
+		df['err'] = df[target_col] - df.pred
+		df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df)
+		df['q'] = df.q.apply(lambda x: int(round(x)) )
+		df['pct_err'] = (df[target_col] / df.pred) - 1
+		pe_mu = df.pct_err.mean()
+		pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
+		if pe_sd != pe_sd:
+			pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
+		df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
+		df['pred_sd'] = df.pred * pe_sd
+		# print(df.groupby('q')[['err','pred',target_col]].mean())
+		print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean())
+		print(df.groupby('q')[['err','pred',target_col]].mean())
+		# df.err.mean()
+		# df[df.weight >= 3.5].err.mean()
+		df[df.pred < 200].err.mean()
+		df['collection'] = collection
+		print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
+		salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
+
+		############################################################
+		#     Create Predictions for Each NFT in The Collection    #
+		############################################################
+		# test = merge(num_metadata, cat_metadata, ['collection','token_id'])
+		# for c in num_features:
+		# 	test[c] = test[c].apply(lambda x: just_float(x) )
+		# tail = df.sort_values('timestamp').tail(1)
+		# test.loc[ test.token_id == '903', 'nft_rank' ] = 18
+		# for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
+		# 	if c in tail.columns:
+		# 		test[c] = tail[c].values[0]
+		# test = standardize_df(test, pred_cols, df)
+
+		# test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values)
+		# test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
+		# test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values)
+		# test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
+
+		# test['pred_price'] = test.pred.apply(lambda x: x if x < 400 else (x-400)**0.96 + 400 )
+		def f(p):
+			c = CUR_FLOOR * 2.5
+			return( p if p <= c else c+((p-c) ** 0.95) )
+		test['pred_price'] = test.pred.apply(lambda x: f(x) )
+		len(test[test.pred <= CUR_FLOOR * 1.01])
+		len(test[test.pred <= CUR_FLOOR * 1.02])
+		if not check_exclude:
+			test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) )
+		len(test[test.pred_price <= CUR_FLOOR])
+		test['pred_sd'] = test.pred_price * pe_sd
+		test = test.sort_values(['collection','token_id'])
+		test['rk'] = test.pred_price.rank(ascending=0, method='first')
+		test['collection'] = collection
+		pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') ).drop_duplicates(subset=['collection','token_id'], keep='last')
 
 
-        ############################################################
-        #     Create Predictions for Each NFT in The Collection    #
-        ############################################################
-        test = merge(num_metadata, cat_metadata, ['collection','token_id'])
-        for c in num_features:
-            test[c] = test[c].apply(lambda x: just_float(x) )
-        tail = df.sort_values('timestamp').tail(1)
-        test.loc[ test.token_id == '903', 'nft_rank' ] = 18
-        test[test.token_id=='903']
-        for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
-            if c in tail.columns:
-                test[c] = tail[c].values[0]
-        test = standardize_df(test, pred_cols, df)
+		imp = []
+		for c in dummies.columns:
+			md = test[test[c] == 1].pred_price.median()
+			md_0 = test.pred_price.quantile(0.475)
+			imp += [[ collection, c, md_0, md ]]
+		# imp = pd.DataFrame(imp, columns=['collection','feature_name',''])
+		imp = pd.DataFrame(imp, columns=['collection','col','col_md','md']).sort_values('md', ascending=0)
+		imp['pct_vs_baseline'] = ((imp.md / imp.col_md) - 1).apply(lambda x: max(0, x))
+		imp['feature_name'] = imp.col.apply(lambda x: re.split('_', x)[0] )
+		imp['feature_value'] = imp.col.apply(lambda x: re.split('_', x)[1] if '_' in x else None )
+		feature_values = feature_values.append(imp[['collection','feature_name','feature_value','pct_vs_baseline']])
 
-        test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values)
-        test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
-        test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values)
-        test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
+		cols = metadata.feature_name.unique()
+		cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ]
+		exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else []
+		for c in cols:
+			cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']]
+			l = len(cur.token_id.unique())
+			if c in exclude:
+				cur['rarity'] = None
+			else:
+				g = cur.groupby('feature_value').token_id.count().reset_index()
+				g['rarity'] = g.token_id / l
+				cur = merge(cur, g[['feature_value','rarity']])
+			attributes = attributes.append(cur)
+	
+	attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title().strip() )
+	attributes['feature_value'] = attributes.feature_value.apply(lambda x: str(x).strip() )
+	sorted(attributes['feature_name'].unique())
+	if len(feature_values):
+		feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
+	# feature_values = pd.read_csv('./data/feature_values.csv')
+	feature_values = feature_values.merge(attributes[['collection','feature_name']].drop_duplicates())
+	# n = feature_values[['collection', 'feature_name']].drop_duplicates().groupby(['collection']).feature_name.count().reset_index().rename(columns={'feature_name': 'n'})
+	# feature_values = feature_values.merge(n)
+	# feature_values['pct_vs_baseline'] = feature_values.pct_vs_baseline / feature_values.n
+	# del feature_values['n']
+	feature_values[ (feature_values.collection == 'Solana Monkey Business') &  (feature_values.feature_name == 'Clothes')  ]
+	feature_values[ (feature_values.collection == 'Solana Monkey Business') & (feature_values.feature_name == 'Clothes') & (feature_values.feature_value == 'Poncho') ]
+	attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == '1') ]
+	attributes[ (attributes.collection == 'Solana Monkey Business') & (attributes.feature_name == 'Clothes') & (attributes.feature_value == 'Poncho') & (attributes.token_id == 1) ]
 
-        test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
-        if not check_exclude:
-            test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) )
-        test['pred_sd'] = test.pred_price * pe_sd
-        test = test.sort_values(['collection','token_id'])
-        test['rk'] = test.pred_price.rank(ascending=0, method='first')
-        test['collection'] = collection
-        pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') )
+	coefsdf.to_csv('./data/coefsdf.csv', index=False)
+	salesdf.to_csv('./data/model_sales.csv', index=False)
+	old = pd.read_csv('./data/pred_price copy.csv')
+	old['token_id'] = old.token_id.astype(str)
+	old = pred_price.merge(old, on=['collection','token_id'])
+	old['ratio'] = old.pred_price_x / old.pred_price_y
+	old = old.sort_values('ratio')
+	old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ]
+	m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))]
+	m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index()
+	m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique())
+	m_p.head()
+	old = old.merge(m_p, on=['collection','token_id'])
+	old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]]
+	old.to_csv('~/Downloads/tmp1.csv', index=False)
+	pred_price.head()
+	old[old.token_id == '4857']
+	old.head()
+	old.tail()
 
-        cols = metadata.feature_name.unique()
-        cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ]
-        exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else []
-        for c in cols:
-            cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']]
-            l = len(cur.token_id.unique())
-            if c in exclude:
-                cur['rarity'] = None
-            else:
-                g = cur.groupby('feature_value').token_id.count().reset_index()
-                g['rarity'] = g.token_id / l
-                cur = merge(cur, g[['feature_value','rarity']])
-            attributes = attributes.append(cur)
+	# nft_rank = m_df[m_df.feature_name=='nft_rank'][['collection','token_id','feature_value']].rename(columns={'feature_value': 'nft_rank'})
+	# nft_rank['token_id'] = nft_rank.token_id.astype(str)
+	# pred_price['token_id'] = pred_price.token_id.astype(str)
+	# pred_price = pred_price.merge(nft_rank, how='left', on=['collection','token_id'])
+	pred_price.to_csv('./data/pred_price.csv', index=False)
+	# pred_price = pd.read_csv('./data/pred_price.csv')
+	pred_price.groupby('collection')[['pred_price']].min()
+	attributes.to_csv('./data/attributes.csv', index=False)
+	attributes = pd.read_csv('./data/attributes.csv')
+	attributes[attributes.rarity.isnull()]
+	feature_values.to_csv('./data/feature_values.csv', index=False)
 
-    attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
-    sorted(attributes['feature_name'].unique())
-    if len(feature_values):
-        feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
-
-    coefsdf.to_csv('./data/coefsdf.csv', index=False)
-    salesdf.to_csv('./data/model_sales.csv', index=False)
-    old = pd.read_csv('./data/pred_price copy.csv')
-    old['token_id'] = old.token_id.astype(str)
-    old = pred_price.merge(old, on=['collection','token_id'])
-    old['ratio'] = old.pred_price_x / old.pred_price_y
-    old = old.sort_values('ratio')
-    old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ]
-    m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))]
-    m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index()
-    m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique())
-    m_p.head()
-    old = old.merge(m_p, on=['collection','token_id'])
-    old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]]
-    old.to_csv('~/Downloads/tmp1.csv', index=False)
-    pred_price.head()
-    old[old.token_id == '4857']
-    old.head()
-    old.tail()
-
-    pred_price.to_csv('./data/pred_price.csv', index=False)
-    attributes.to_csv('./data/attributes.csv', index=False)
-    attributes[attributes.rarity.isnull()]
-    feature_values.to_csv('./data/feature_values.csv', index=False)
-
-    # metadata = pd.read_csv('./data/metadata.csv')
-    # metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
-    # metadata['token_id'] = metadata.token_id.astype(str)
-    # metadata.head()
-    # nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'})
-    # nft_rank['feature_name'] = 'nft_rank'
-    # metadata = metadata[metadata.feature_name != 'nft_rank']
-    # nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates())
-    # metadata = metadata.append(nft_rank)
-    # metadata.to_csv('./data/metadata.csv', index=False)
+	# metadata = pd.read_csv('./data/metadata.csv')
+	# metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
+	# metadata['token_id'] = metadata.token_id.astype(str)
+	# metadata.head()
+	# nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'})
+	# nft_rank['feature_name'] = 'nft_rank'
+	# metadata = metadata[metadata.feature_name != 'nft_rank']
+	# nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates())
+	# metadata = metadata.append(nft_rank)
+	# metadata.to_csv('./data/metadata.csv', index=False)
 
 
-    feature_values.to_csv('./data/feature_values.csv', index=False)
+	feature_values.to_csv('./data/feature_values.csv', index=False)
 
-    if check_exclude:
-        salesdf['rat'] = salesdf.price / salesdf.pred
-        salesdf['dff'] = salesdf.price - salesdf.pred
-        salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
-        salesdf['rat'] = salesdf.pred / salesdf.price
-        salesdf['dff'] = salesdf.pred - salesdf.price
-        salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
-        salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
-        print(salesdf.exclude_1.mean())
-        print(salesdf.exclude_2.mean())
-        print(salesdf.exclude.mean())
-        salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
-        salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
+	if True or check_exclude:
+		exclude = pd.read_csv('./data/exclude.csv')
+		salesdf['rat'] = salesdf.price / salesdf.pred
+		salesdf['dff'] = salesdf.price - salesdf.pred
+		salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
+		salesdf['rat'] = salesdf.pred / salesdf.price
+		salesdf['dff'] = salesdf.pred - salesdf.price
+		salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2.5)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
+		salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
+		print(salesdf.exclude_1.mean())
+		print(salesdf.exclude_2.mean())
+		print(salesdf.exclude.mean())
+		salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
+		exclude = exclude.append(salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]])
+		# salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
+		exclude.to_csv('./data/exclude.csv', index=False)
+
+# train_model(True, False)
+# train_model(False, False)
+train_model(False, True)
 
-train_model(True, False)
-train_model(False, True)
\ No newline at end of file
diff --git a/solana_model_old.py b/solana_model_old.py
index f4002140..a8d6aef0 100644
--- a/solana_model_old.py
+++ b/solana_model_old.py
@@ -1,3 +1,4 @@
+import collections
 import os
 import re
 import json
@@ -5,570 +6,679 @@ import warnings
 import requests
 import numpy as np
 import pandas as pd
+import kutils as ku
 import urllib.request
 import tensorflow as tf
 import snowflake.connector
+
+from curses import meta
+from copy import deepcopy
 from datetime import datetime
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import LinearRegression, RidgeCV, Lasso
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.linear_model import LinearRegression, RidgeCV, Lasso, Ridge
 from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
 
+os.chdir('/Users/kellenblumberg/git/nft-deal-score')
+from scrape_sol_nfts import clean_name
+
 warnings.filterwarnings('ignore')
 
-os.chdir('/Users/kellenblumberg/git/nft-deal-score')
-
-CHECK_EXCLUDE = False
-CHECK_EXCLUDE = True
-
-# Using sales from howrare.is - the last sale that was under 300 was when the floor was at 72. Filtering for when the floor is >100, the lowest sale was 400
 
 ###################################
 #     Define Helper Functions     #
 ###################################
 def standardize_df(df, cols, usedf=None, verbose=False):
-    for c in cols:
-        if type(usedf) != type(pd.DataFrame()):
-            usedf = df
-        mu = usedf[c].mean()
-        sd = usedf[c].std()
-        if verbose:
-            print(c)
-        if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
-            df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
-        else:
-            df['std_{}'.format(c)] = (df[c] - mu) / sd
-    return(df)
+	for c in cols:
+		if type(usedf) != type(pd.DataFrame()):
+			usedf = df
+		mu = usedf[c].mean()
+		sd = usedf[c].std()
+		if verbose:
+			print(c)
+		if len(df[c].unique()) == 2 and df[c].max() == 1 and df[c].min() == 0:
+			# df['std_{}'.format(c)] = df[c].apply(lambda x: (x*2) - 1 )
+			df['std_{}'.format(c)] = df[c]
+		else:
+			df['std_{}'.format(c)] = (df[c] - mu) / sd
+	return(df)
+
+def merge(left, right, on=None, how='inner', ensure=True, verbose=True):
+	df = left.merge(right, on=on, how=how)
+	if len(df) != len(left) and (ensure or verbose):
+		print('{} -> {}'.format(len(left), len(df)))
+		cur = left.merge(right, on=on, how='left')
+		cols = set(right.columns).difference(set(left.columns))
+		print(cols)
+		col = list(cols)[0]
+		missing = cur[cur[col].isnull()]
+		print(missing.head())
+		if ensure:
+			assert(False)
+	return(df)
 
 def just_float(x):
-    x = re.sub('[^\d\.]', '', str(x))
-    return(float(x))
+	x = re.sub('[^\d\.]', '', str(x))
+	return(float(x))
 
 def calculate_percentages(df, cols=[]):
-    add_pct = not 'pct' in df.columns
-    if not len(cols):
-        cols = df.columns
-    if add_pct:
-        df['pct'] = 1
-    for c in cols:
-        g = df[c].value_counts().reset_index()
-        g.columns = [ c, 'N' ]
-        col = '{}_pct'.format(c)
-        g[col] = g.N / g.N.sum()
-        df = df.merge( g[[ c, col ]] )
-        if add_pct:
-            df['pct'] = df.pct * df[col]
-    return(df)
+	add_pct = not 'pct' in df.columns
+	if not len(cols):
+		cols = df.columns
+	if add_pct:
+		df['pct'] = 1
+	for c in cols:
+		g = df[c].value_counts().reset_index()
+		g.columns = [ c, 'N' ]
+		col = '{}_pct'.format(c)
+		g[col] = g.N / g.N.sum()
+		df = df.merge( g[[ c, col ]] )
+		if add_pct:
+			df['pct'] = df.pct * df[col]
+	return(df)
 
-exclude = [
-    # (collection, token_id, price)
-    ( 'aurory', 2239, 3500 )
-    # ( 'aurory', 856, 150 )
-    # ( 'aurory', 4715, 500 )
-    # ( 'aurory', 5561, 298 )
-    # ( 'aurory', 5900, 199 )
-    # ( 'aurory', 3323, 138 )
-]
-s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
-s_df[ s_df.collection == 'Levana Dragons' ].sort_values('block_timestamp', ascending=0).head()
-print(len(s_df[s_df.collection == 'Levana Dragon Eggs']))
-print(s_df.groupby('collection').token_id.count())
-s_df.collection.unique()
-s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
-s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
-s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
-for e in exclude:
-    s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
-s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
+def get_sales(check_exclude = True, exclude=[]):
 
-# exclude wierd data points
-if not CHECK_EXCLUDE:
-    exclude = pd.read_csv('./data/exclude.csv')
-    s_df = s_df.merge(exclude, how='left')
-    s_df = s_df[s_df.exclude.isnull()]
-    del s_df['exclude']
+	s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
+	s_df['token_id'] = s_df.token_id.astype(str)
+	s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
+	s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
+	s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
+	s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
+	for e in exclude:
+		s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
+	s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
 
-m_df = pd.read_csv('./data/metadata.csv')
-m_df['token_id'] = m_df.token_id.astype(str)
-tmp = m_df[m_df.collection.isin(['Levana Dragon Eggs','Levana Meteors','Levana Dust'])]
-tmp['tmp'] = tmp.token_id.astype(int)
-tmp.groupby('collection').tmp.max()
-m_df.head()
-# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d %H:%M:%S') )
-s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
-s_df['timestamp'] = s_df.block_timestamp.astype(int)
-# del metadata['price']
-# del metadata['last_sale']
-s_df = s_df.sort_values(['collection','block_timestamp'])
-s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-s_df = s_df.sort_values(['collection','block_timestamp'])
-s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
-s_df[[ 'block_timestamp','days_ago' ]].drop_duplicates(subset=['days_ago'])
+	# exclude wierd data points
+	if not check_exclude:
+		exclude = pd.read_csv('./data/exclude.csv')
+		exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
+		exclude['token_id'] = exclude.token_id.astype(str)
+		s_df = s_df.merge(exclude, how='left')
+		s_df = s_df[s_df.exclude.isnull()]
+		del s_df['exclude']
 
-s_df['av_20'] = s_df.groupby('collection')['mn_20'].rolling(20).mean().reset_index(0,drop=True)
-s_df = s_df.sort_values(['collection','block_timestamp'])
-# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).median().reset_index(0,drop=True)
-s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
-# s_df[ (-((s_df.price) >= (s_df.md_20 * 0.2))) & (s_df.price.notnull()) & (s_df.collection == 'Levana Dragon Eggs') ]
+	###########################
+	#     Calculate Floor     #
+	###########################
+	s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
+	s_df['timestamp'] = s_df.block_timestamp.astype(int)
+	s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
 
-s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.75) ]
-s_df = s_df.sort_values(['collection','block_timestamp'])
-s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
-s_df = s_df.sort_values(['collection','block_timestamp'])
-# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).min().reset_index(0,drop=True)
-s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
-s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(21).tail(40)
-s_df.sort_values(['collection','block_timestamp'])[['price','mn_20','block_timestamp']].head(20).sort_values('price')
-s_df['tmp'] = s_df.mn_20 / s_df.md_20
+	# lowest price in last 20 sales
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
 
-tmp = s_df[s_df.collection=='smb'][['mn_20','block_timestamp']]
-tmp['date'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
-tmp = tmp.groupby('date').mn_20.median().reset_index()
-tmp.to_csv('~/Downloads/tmp.csv', index=False)
+	# exclude sales that are far below the existing floor
+	s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
 
-s_df['tmp'] = s_df.price / s_df.mn_20
-s_df[s_df.collection == 'smb'].sort_values('block_timestamp')[['token_id','price','mn_20']]
-s_df[s_df.collection == 'smb'].sort_values('tmp').head(20)[['collection','token_id','price','mn_20','tmp']]
-s_df.groupby('collection').tmp.median()
-s_df.groupby('collection').tmp.mean()
+	# 10%ile of last 20 sales
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	s_df = s_df.sort_values(['collection','block_timestamp'])
+	s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
+	s_df['sim'] = 0
+	s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
+	s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
+	return(s_df)
 
-s_df.sort_values('tmp').head()
-s_df['tmp'] = s_df.price / s_df.mn_20
-s_df[['collection','token_id','block_timestamp','price','mn_20','md_20','av_20','tmp']].to_csv('~/Downloads/tmp.csv', index=False)
-s_df.groupby('collection').tmp.median()
-s_df.groupby('collection').tmp.mean()
-s_df.sort_values('tmp', ascending=0).head()
-s_df.head(21)
-m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
-# m_df['feature_value'] = m_df.feature_value.apply(lambda x: x.strip() )
-# m_df.feature_value.unique()
-pred_cols = {}
-metadata = {}
-sales = {}
-collection_features = {}
-m_df[(m_df.collection == 'Galactic Punks') & (m_df.feature_name == 'pct')].sort_values('token_id')
-c = 'Levana Dragon Eggs'
-# pred_cols[c]
-EXCLUDE_COLS = {
-    # 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
-    'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count']
-}
-NUMERIC_COLS = {
-    'Levana Dragon Eggs': ['rank','score','pct','collection_rank','weight','temperature']
-}
-for c in s_df.collection.unique():
-    print('Building {} model'.format(c))
-    exclude = EXCLUDE_COLS[c] if c in EXCLUDE_COLS.keys() else []
-    n_cols = NUMERIC_COLS[c] if c in NUMERIC_COLS.keys() else []
-    exclude = [ x for x in exclude if not x in n_cols ]
-    o_cols = sorted([x for x in m_df[ m_df.collection == c ].feature_name.unique() if (not x in exclude) and not (x in n_cols) ])
+def train_model(check_exclude, supplement_with_listings):
+	exclude = [
+		( 'aurory', 2239, 3500 )
+	]
+	s_df = get_sales(check_exclude, exclude)
+	# s_df = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
+	# s_df['collection'] = s_df.collection.apply(lambda x: clean_name(x))
+	# s_df = s_df[-s_df.collection.isin(['Levana Meteors','Levana Dust'])]
+	# s_df = s_df[ -s_df.collection.isin(['boryokudragonz', 'Boryoku Dragonz']) ]
+	# s_df = s_df[[ 'chain','collection','block_timestamp','token_id','price','tx_id' ]]
+	# for e in exclude:
+	#     s_df = s_df[-( (s_df.collection == e[0]) & (s_df.token_id == e[1]) & (s_df.price == e[2]) )]
+	# s_df = s_df[ -((s_df.collection == 'smb') & (s_df.price < 1)) ]
 
-    sales[c] = s_df[ s_df.collection == c ]
-    pred_cols[c] = sorted( n_cols + o_cols )
-    collection_features[c] = [ c for c in pred_cols[c] if not c in ['score','rank','pct']+exclude ]
-    metadata[c] = m_df[ (m_df.collection == c) & (-(m_df.feature_name.isin(exclude))) ]
+	# # exclude wierd data points
+	# if not check_exclude:
+	#     exclude = pd.read_csv('./data/exclude.csv')
+	#     exclude['collection'] = exclude.collection.apply(lambda x: clean_name(x))
+	#     s_df = s_df.merge(exclude, how='left')
+	#     s_df = s_df[s_df.exclude.isnull()]
+	#     del s_df['exclude']
 
-    # tmp = pd.pivot_table( metadata[c], ['collection','token_id'], columns=['feature_name'], values=['feature_value'] )
-    metadata[c] = metadata[c].pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
-    metadata[c].columns = [ 'collection','token_id' ] + pred_cols[c]
-
-    features = collection_features[c]
-    cur = metadata[c]
-    # cur = cur.dropna(subset=features)
-    for f in features:
-        if type(cur[f].values[0] == str):
-            cur[f] = cur[f].apply(lambda x: re.sub("\"", "", str(x) ) )
-            cur[f] = cur[f].apply(lambda x: re.split("\(", x )[0].strip())
-    cur = cur.replace('', 'Default')
-    # if not 'pct' in cur.columns:
-    cur = calculate_percentages( cur, o_cols )
-    dummies = pd.get_dummies(cur[o_cols])
-    # feature_cols = dummies.columns
-    cur = pd.concat([ cur.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
-    metadata[c] = cur
-    # pred_cols[c] = ['rank','score','timestamp','mn_20','log_mn_20'] + list(dummies.columns)
-    # cols = [ 'collection_rank' ]
-    # cols = [ ]
-    # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + [x for x in cols if x in m_df.feature_name.unique()] + list(dummies.columns)
-    # pred_cols[c] = [ 'rank','transform_rank','score'] + n_cols + list(dummies.columns)
-    pred_cols[c] = n_cols + list(dummies.columns)
-
-# collection_features = {
-#     'Hashmasks': [ 'character','eyecolor','item','mask','skincolor' ]
-#     , 'Galactic Punks': [ 'backgrounds','hair','species','suits','jewelry','headware','glasses' ]
-#     , 'Solana Monkey Business': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
-#     , 'Aurory': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
-#     # , 'Thugbirdz': [ 'attribute_count','type','clothes','ears','mouth','eyes','hat','background' ]
-# }
-
-coefsdf = pd.DataFrame()
-salesdf = pd.DataFrame()
-attributes = pd.DataFrame()
-pred_price = pd.DataFrame()
-feature_values = pd.DataFrame()
-collections = sorted(metadata.keys())
-collection = 'Galactic Punks'
-tokens = pd.read_csv('./data/tokens.csv')
-collection = 'Levana Dragon Eggs'
-# for collection in s_df.collection.unique():
-for collection in ['Levana Dragon Eggs']:
-    # collection = 'LunaBulls'
-    # collection = 'smb'
-    # collection = 'aurory'
-    # collection = 'meerkatmillionaires'
-    print('Working on collection {}'.format(collection))
-    p_metadata = metadata[collection]
-    if 'attribute_count' in p_metadata.columns:
-        p_metadata['attribute_count'] = p_metadata.attribute_count.astype(float).astype(int)
-    
-    p_sales = sales[collection]
-    # specify the predictive features
-    p_pred_cols = pred_cols[collection]
-    if collection == 'Levana Dragon Eggs':
-        p_pred_cols += [ 'transformed_collection_rank' ]
-    p_features = collection_features[collection]
-    p_sales['token_id'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
-    p_metadata['token_id'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
-    for c in [ 'rank','score' ]:
-        p_metadata[c] = p_metadata[c].astype(float)
-    # p_sales['contract_address'] = p_sales.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
-    # p_metadata['contract_address'] = p_metadata.token_id.apply(lambda x: re.sub("\"", "", str(x)) )
-    p_sales['contract_address'] = ''
-    p_metadata['contract_address'] = ''
-
-    # remove 1 columns for each group (since they are colinear)
-    # exclude = []
-    # for f in p_features:
-    #     e = [ c for c in p_pred_cols if c[:len(f)] == f ][-1]
-    #     exclude.append(e)
-
-    df = p_sales.merge(p_metadata, on=['token_id','contract_address'])
-    df = df[df.mn_20.notnull()]
-    target_col = 'adj_price'
-    df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
-    # df['mn_20'] = df.apply(lambda x: min(x[target_col], x['mn_20']), 1 )
-    # tmp = df[['block_timestamp','mn_20']].copy()
-    # tmp['tmp'] = tmp.block_timestamp.apply(lambda x: str(x)[:10] )
-    # tmp = tmp.groupby('tmp').mn_20.median().reset_index()
-    # tmp.sort_values('tmp').to_csv('~/Downloads/tmp.csv', index=False)
-    # df['timestamp'] = df.block_timestamp.astype(int)
-    df = df[df[target_col].notnull()]
-    df = df.reset_index(drop=True)
-    df['transform_rank'] = df['rank'].apply(lambda x: 1.0 / (x**2) )
-    df['rel_price_0'] = df[target_col] - df.mn_20
-    df['rel_price_1'] = df[target_col] / df.mn_20
-    df = df[df.mn_20 > 0]
-    df['log_mn_20'] = np.log(df.mn_20)
-    print('Training on {} sales'.format(len(df)))
-    # df['price_median'] = df.groupby('token_id').price.median()
-
-    # standardize columns to mean 0 sd 1
-    len(p_pred_cols)
-    n_cols = NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []
-    for c in n_cols:
-        df[c] = df[c].apply(lambda x: just_float(x) )
-    if collection == 'Levana Dragon Eggs':
-        df['transformed_collection_rank'] = df.collection_rank.apply(lambda x: (1.0/ x)**2 )
-    df = standardize_df(df, p_pred_cols)
-    std_pred_cols_0 = [ 'std_{}'.format(c) for c in p_pred_cols ]
-    # p_pred_cols = [ c for c in p_pred_cols if not c in exclude ]
-    std_pred_cols = [ 'std_{}'.format(c) for c in p_pred_cols ]
-    df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
-    # df.sort_values('block_timestamp').head(10)[['price','tx_id']]
-    # df.sort_values('block_timestamp').head(10)[['price','tx_id']].tx_id.values
-    # df = df[df.price >= 1]
-
-    #########################
-    #     Run the Model     #
-    #########################
-    len(df)
-    len(df.dropna(subset=std_pred_cols))
-    tmp = df[std_pred_cols].count().reset_index()
-    tmp.columns = ['a','b']
-    tmp.sort_values('b').head(20)
-    rem = list(tmp[tmp.b==0].a.values)
-    std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
-    mn = df.timestamp.min()
-    mx = df.timestamp.max()
-    df['weight'] = df.timestamp.apply(lambda x: 2.5 ** ((x - mn) / (mx - mn)) )
-    X = df[std_pred_cols].values
-    mu = df.log_price.mean()
-    sd = df.log_price.std()
-    df['std_log_price'] = (df.log_price - mu) / sd
-    # y = df.std_log_price.values
-    # y = df[target_col].values
-    # y = df.rel_price_1.values
-    y_0 = df.rel_price_0.values
-    y_1 = df.rel_price_1.values
-    # y_log = df.log_price.values
-
-    clf_lin = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
-    clf_lin.fit(X, y_0, df.weight.values)
-    coefs = []
-    for a, b in zip(std_pred_cols, clf_lin.coef_):
-        coefs += [[a,b]]
-    coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
-    coefs.to_csv('~/Downloads/tmp.csv', index=False)
-    df['pred_lin'] = clf_lin.predict(X)
-    df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
-    df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) )
-    # df['err_lin'] = abs(df.pred_lin - df.price )
-    # df[[ 'price','pred_lin','err_lin','mn_20' ]].sort_values('err_lin').tail(50)
-    df.head()
-    clf_log = Lasso() if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
-    clf_log.fit(X, y_1, df.weight.values)
-    coefs = []
-    for a, b in zip(std_pred_cols, clf_log.coef_):
-        coefs += [[a,b]]
-    coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
-    coefs.to_csv('~/Downloads/tmp.csv', index=False)
-    df['pred_log'] = clf_log.predict(X)
-    df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
-    df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) )
-    df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50)
-    df['err'] = df.err_lin * df.err_log
-
-    df[[ target_col,'pred_log','err_log','err_lin','err','mn_20' ]].sort_values('err').tail(50)
-    df['collection'] = collection
-
-    # df['pred_lin'] = clf_lin.predict(X)
-    # df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
-    # df['pred_log'] = np.exp(clf_log.predict(X))
-    # df['pred_log'] = clf_log.predict(X)
-    # df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
-    clf = LinearRegression(fit_intercept=False)
-    clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.weight.values )
-    print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
-    l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
-    tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price'])
-    if clf.coef_[0] < 0:
-        print('Only using log')
-        df['pred'] = df.pred_log
-        tmp['lin_coef'] = 0
-        tmp['log_coef'] = 1
-    elif clf.coef_[1] < 0:
-        print('Only using lin')
-        df['pred'] = df.pred_lin
-        tmp['lin_coef'] = 1
-        tmp['log_coef'] = 0
-    else:
-        print('Only using BOTH!')
-        df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
-    coefsdf = coefsdf.append(tmp)
-    df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
-    df[df.block_timestamp>='2021-10-01'].sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'token_id','block_timestamp','err','mn_20' ]]
-    # df[df.block_timestamp>='2021-10-01'].err.mean()
-    df.merge(tokens[['collection','token_id','clean_token_id']]).sort_values('err', ascending=0).head(10)[[ 'pred',target_col,'clean_token_id','rank','block_timestamp','err','mn_20','tx_id' ]]
-    df.sort_values('price', ascending=0).head(20)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
-    df.sort_values('price', ascending=0).tail(40)[[ 'price','pred',target_col,'token_id','block_timestamp','err','mn_20','tx_id' ]]
-    df.sort_values('price', ascending=0).head(20).tx_id.values
-
-    # print(np.mean(y))
-    # print(np.mean(clf.predict(X)))
-
-    # # run neural net
-    # model = tf.keras.models.Sequential([
-    #     tf.keras.layers.Dense(9, activation='relu')
-    #     , tf.keras.layers.Dropout(.2)
-    #     , tf.keras.layers.Dense(3, activation='relu')
-    #     , tf.keras.layers.Dropout(.2)
-    #     , tf.keras.layers.Dense(1, activation='linear')
-    # ])
-    # model.compile(loss='mae', optimizer=tf.keras.optimizers.SGD(learning_rate=0.0025))
-    # model.fit(X, y, epochs=500, validation_split=0.3)
-
-    # df['pred'] = np.exp( (sd * model.predict(df[std_pred_cols].values)) + mu)
-    # df['pred'] = model.predict(df[std_pred_cols].values)
-    # ratio = df.price.mean() / df.pred.mean()
-    # print("Manually increasing predictions by {}%".format(round((ratio-1) * 100, 1)))
-
-    # checking errors
-    # df['pred'] = df.pred * ratio
-    df['err'] = df[target_col] - df.pred
-    df['q'] = df.pred.rank() * 10 / len(df)
-    df['q'] = df.q.apply(lambda x: int(round(x)) )
-    df['pct_err'] = (df[target_col] / df.pred) - 1
-    pe_mu = df.pct_err.mean()
-    pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
-    pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
-    df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
-    df['pred_sd'] = df.pred * pe_sd
-    print(df.groupby('q')[['err','pred',target_col]].mean())
-    print(df[df.weight >= df.weight.median()].groupby('q')[['err','pred',target_col]].mean())
-    # df.err.mean()
-    # df[df.weight >= 3.5].err.mean()
-    df['collection'] = collection
-    print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
-    salesdf = salesdf.append( df[[ 'collection','contract_address','token_id','block_timestamp','price','pred','mn_20','rank','score' ]].sort_values('block_timestamp', ascending=0) )
-
-    # create the attributes dataframe
-    for f in p_features:
-        if f and '{}_pct'.format(f) in p_metadata.columns:
-            cur = p_metadata[[ 'token_id', f, '{}_pct'.format(f) ]]
-            cur.columns = [ 'token_id', 'value','rarity' ]
-            cur['feature'] = f
-            cur['collection'] = collection
-            attributes = attributes.append(cur)
-
-    # create predictions for each NFT in the collection
-    test = p_metadata.copy()
-    for c in n_cols:
-        test[c] = test[c].apply(lambda x: just_float(x) )
-    if collection in [ 'Levana Dragon Eggs' ]:
-        test['transformed_collection_rank'] = test.collection_rank.apply(lambda x: (1.0 / x) ** 2 )
-    tail = df.sort_values('timestamp').tail(1)
-    for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
-        if c in tail.columns:
-            test[c] = tail[c].values[0]
-    test = standardize_df(test, [c for c in p_pred_cols if not c in ['timestamp'] ], df, True)
-    # test['pred_lin'] = clf_lin.predict( test[std_pred_cols].values )
-    # test['pred_log'] = np.exp(clf_log.predict( test[std_pred_cols].values ))
-
-    test['pred_lin'] = clf_lin.predict(test[std_pred_cols].values)
-    test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
-    # test['pred_lin'] = df.pred_lin + df.mn_20
-    # df['pred_log'] = np.exp(clf_log.predict(X))
-    test['pred_log'] = clf_log.predict(test[std_pred_cols].values)
-    test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
-
-    test['pred'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
-    # test['pred'] = np.exp( (sd * model.predict(test[std_pred_cols].values)) + mu) * ratio
-    test['pred_price'] = test.pred#.apply(lambda x: x*(1+pe_mu) )
-    if not CHECK_EXCLUDE:
-        test['pred_price'] = test.pred.apply(lambda x: (x*0.985) )
-    test['pred_sd'] = test.pred * pe_sd
-    test['rk'] = test.pred.rank(ascending=0, method='first')
-    test['collection'] = collection
-    pred_price = pred_price.append( test[[ 'collection', 'contract_address','token_id','rank','rk','pred_price','pred_sd' ] + p_features].rename(columns={'rank':'hri_rank'}).sort_values('pred_price') )
-    # print(test[[ 'contract_address','token_id','pred_price','pred_sd' ]].sort_values('pred_price'))
+	#########################
+	#     Load Metadata     #
+	#########################
+	m_df = pd.read_csv('./data/metadata.csv')
+	m_df['token_id'] = m_df.token_id.astype(str)
+	m_df['collection'] = m_df.collection.apply(lambda x: clean_name(x))
+	m_df['token_id'] = m_df.token_id.astype(str)
+	# remove ones that are not actually metadata
+	m_df = m_df[ -m_df.feature_name.isin([ 'price','last_sale','feature_name','feature_value' ]) ]
+	m_df['feature_value'] = m_df.feature_value.apply(lambda x: re.split("\(", re.sub("\"", "", x))[0] if type(x)==str else x )
+	m_df[(m_df.feature_name=='rank') & (m_df.collection == 'Levana Dragon Eggs')]
+	sorted(m_df[ (m_df.collection == 'Solana Monkey Business') ].feature_name.unique())
 
 
-    ##############################
-    #     Feature Importance     #
-    ##############################
-    coefs = []
-    for a, b, c in zip(p_pred_cols, clf_lin.coef_, clf_log.coef_):
-        coefs += [[ collection, a, b, c ]]
-    coefs = pd.DataFrame(coefs, columns=['collection','col','lin_coef','log_coef'])
-    # coefs['feature'] = coefs.col.apply(lambda x: ' '.join(re.split('_', x)[:-1]).title() )
-    # coefs['feature'] = coefs.col.apply(lambda x: '_'.join(re.split('_', x)[:-1]) )
-    # coefs['value'] = coefs.col.apply(lambda x: re.split('_', x)[-1] )
-    # mn = coefs.groupby('feature')[[ 'lin_coef','log_coef' ]].min().reset_index()
-    # mn.columns = [ 'feature','mn_lin_coef','mn_log_coef' ]
-    # coefs = coefs.merge(mn)
-    # coefs['lin_coef'] = coefs.lin_coef - coefs.mn_lin_coef
-    # coefs['log_coef'] = coefs.log_coef - coefs.mn_log_coef
-    # coefs
-    # g = attributes[ attributes.collection == collection ][[ 'feature','value','rarity' ]].drop_duplicates()
-    # g['value'] = g.value.astype(str)
-    # len(coefs)
-    # g = coefs.merge(g, how='left')
-    # g[g.rarity.isnull()]
-    # len(g)
-    # coefs = coefs.merge( m_df[ m_df.collection == collection ][[ 'feature_name','' ]] )
-    # coefs.sort_values('lin_coef').tail(20)
-
-    # TODO: pick the most common one and have that be the baseline
-    most_common = attributes[(attributes.collection == collection)].sort_values('rarity', ascending=0).groupby('feature').head(1)
-    most_common['col'] = most_common.apply(lambda x: 'std_{}_{}'.format( re.sub(' ', '_', x['feature'].lower()), x['value'] ), 1 )
-    mc = most_common.col.unique()
-    data = []
-    for c0 in std_pred_cols_0:
-        if c0 in ['std_rank','std_score','std_pct','std_timestamp','std_mn_20','std_log_mn_20']:
-            continue
-        f = '_'.join(re.split('_', c0)[1:-1])
-        v = re.split('_', c0)[-1]
-        rarity = p_metadata[p_metadata['{}_{}'.format(f, v)]==1]['{}_pct'.format(f)].values[0]
-        # avg = p_metadata['{}_pct'.format(f)].mean()
-        # avg_pct = df.pct.mean()
-        # pct_std = ((avg_pct * r / avg) - avg_pct) / df.pct.std()
-        r = df[df['{}_{}'.format(f, v)]==1].std_rank.mean()
-        s = df[df['{}_{}'.format(f, v)]==1].std_score.mean()
-        if r == r and s == s:
-            datum = [ c0, rarity ]
-            for c1 in std_pred_cols:
-                datum.append(1 if c1 == c0 else r if c1 == 'std_rank' else s if c1 == 'std_score' else 1 if c1 in mc else 0 )
-            data += [ datum ]
-
-    importance = pd.DataFrame(data, columns=['feature','rarity']+std_pred_cols)
-    sorted(importance.feature.unique())
-    importance[importance.feature == 'std_fur_/_skin_Leopard']
-    if 'std_timestamp' in df.columns:
-        importance['std_timestamp'] = df.std_timestamp.max()
-    # importance['pred_lin'] = clf_lin.predict( importance[std_pred_cols].values )
-    # importance['pred_log'] = np.exp(clf_log.predict( importance[std_pred_cols].values ))
-
-    importance['pred_lin'] = clf_lin.predict(importance[std_pred_cols].values)
-    importance['pred_lin'] = importance.pred_lin.apply(lambda x: max(0, x) + l)
-    # importance['pred_lin'] = importance.pred_lin.apply(lambda x: x + l)
-    importance['pred_log'] = clf_log.predict(importance[std_pred_cols].values)
-    importance['pred_log'] = importance.pred_log.apply(lambda x: max(1, x)) * l
-    # importance['pred_log'] = importance.pred_log.apply(lambda x: x) * l
-
-    importance['pred'] = clf.predict( importance[[ 'pred_lin','pred_log' ]].values )
-    # importance['pred'] = np.exp( (sd * model.predict(importance[std_pred_cols].values)) + mu)
-    importance = importance.sort_values('pred', ascending=0)
-    importance.head()[['feature','pred']]
-    importance[importance.feature == 'std_fur_/_skin_Leopard']
-    importance['feature'] = importance.feature.apply(lambda x: re.sub('std_', '', x))
-    importance['value'] = importance.feature.apply(lambda x: re.split('_', x)[-1])
-    importance['feature'] = importance.feature.apply(lambda x: '_'.join(re.split('_', x)[:-1]))
-    mn = importance.groupby('feature').pred.min().reset_index().rename(columns={'pred':'baseline'})
-    importance = importance.merge(mn)
-    importance['pred_vs_baseline'] = importance.pred - importance.baseline
-    importance['pct_vs_baseline'] = (importance.pred / importance.baseline) - 1
-    importance[(importance.feature == 'fur_/_skin')].sort_values('pred')[['value','rarity','pred','pred_lin','pred_log','std_rank','std_score']].sort_values('rarity')
-    importance['collection'] = collection
-    importance.sort_values('pct_vs_baseline')[['feature','value','pct_vs_baseline']]
-    tmp = importance[std_pred_cols].mean().reset_index()
-    tmp.columns = [ 'a', 'b' ]
-    tmp = tmp.sort_values('b')
-    feature_values = feature_values.append(importance[['collection','feature','value','pred','pred_vs_baseline','pct_vs_baseline','rarity']])
-
-attributes['feature'] = attributes.feature.apply(lambda x: re.sub('_', ' ', x).title() )
-feature_values['feature'] = feature_values.feature.apply(lambda x: re.sub('_', ' ', x).title() )
-
-pred_price = pred_price[[ 'collection', 'contract_address', 'token_id', 'hri_rank', 'rk', 'pred_price', 'pred_sd' ]]
+	#####################################
+	#     Exclude Special LunaBulls     #
+	#####################################
+	tokens = pd.read_csv('./data/tokens.csv')
+	tokens['collection'] = tokens.collection.apply(lambda x: clean_name(x))
+	tokens.token_id.unique()
+	lunabullsrem = tokens[tokens.clean_token_id>=10000].token_id.unique()
+	m_df = m_df[ -((m_df.collection == 'LunaBulls') & (m_df.token_id.isin(lunabullsrem))) ]
+	s_df = s_df[ -((s_df.collection == 'LunaBulls') & (s_df.token_id.isin(lunabullsrem))) ]
+	s_df = s_df.drop_duplicates(subset=['collection','token_id','price'])
 
 
-coefsdf.to_csv('./data/coefsdf.csv', index=False)
-salesdf.to_csv('./data/model_sales.csv', index=False)
-pred_price.to_csv('./data/pred_price.csv', index=False)
-attributes.to_csv('./data/attributes.csv', index=False)
-feature_values.to_csv('./data/feature_values.csv', index=False)
+	###########################
+	#     Calculate Floor     #
+	###########################
+	# s_df['block_timestamp'] = s_df.block_timestamp.apply(lambda x: datetime.strptime(str(x)[:19], '%Y-%m-%d %H:%M:%S') if len(x) > 10 else datetime.strptime(x[:10], '%Y-%m-%d') )
+	# s_df['timestamp'] = s_df.block_timestamp.astype(int)
+	# s_df['days_ago'] = s_df.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
 
-pred_price = pd.read_csv('./data/pred_price.csv')
-tokens = pd.read_csv('./data/tokens.csv')
-rem = tokens[tokens.clean_token_id>=10000].token_id.unique()
-l0 = len(pred_price)
-pred_price = pred_price[ -((pred_price.collection == 'LunaBulls') & (pred_price.token_id.isin(rem))) ]
-l1 = len(pred_price)
-pred_price.to_csv('./data/pred_price.csv', index=False)
+	# # lowest price in last 20 sales
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['md_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.01).reset_index(0,drop=True)
 
-# listings = pd.read_csv('./data/listings.csv')
-# listings['token_id'] = listings.token_id.astype(int)
+	# # exclude sales that are far below the existing floor
+	# s_df = s_df[ (s_df.price) >= (s_df.md_20 * 0.70) ]
 
-# tmp = salesdf.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
-# tmp = pred_price.merge(attributes[ (attributes.collection == 'thugbirdz') & (attributes.feature == 'Position In Gang') & (attributes.value == 'Underboss') ])
-# tmp['token_id'] = tmp.token_id.astype(int)
-# tmp = tmp.merge(listings[['collection','token_id','price']])
-# tmp.sort_values('pred_price', ascending=0)
+	# # 10%ile of last 20 sales
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection').price.shift(1)
+	# s_df = s_df.sort_values(['collection','block_timestamp'])
+	# s_df['mn_20'] = s_df.groupby('collection')['mn_20'].rolling(20).quantile(.1).reset_index(0,drop=True)
+	# s_df['sim'] = 0
+	# s_df['tmp'] = s_df.block_timestamp.apply(lambda x: str(x)[:10] )
+	# s_df.groupby(['collection','tmp']).mn_20.mean().reset_index().to_csv('~/Downloads/mn_20.csv', index=False)
 
-if CHECK_EXCLUDE:
-    salesdf['rat'] = salesdf.price / salesdf.pred
-    salesdf['dff'] = salesdf.price - salesdf.pred
-    salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
-    salesdf['rat'] = salesdf.pred / salesdf.price
-    salesdf['dff'] = salesdf.pred - salesdf.price
-    salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
-    salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
-    print(salesdf.exclude_1.mean())
-    print(salesdf.exclude_2.mean())
-    print(salesdf.exclude.mean())
-    salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
-    salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
+	if supplement_with_listings:
+		pred_price = pd.read_csv('./data/pred_price.csv')
+		pred_price['collection'] = pred_price.collection.apply(lambda x: clean_name(x))
+		listings = pd.read_csv('./data/listings.csv')
+		listings['collection'] = listings.collection.apply(lambda x: clean_name(x))
+		listings['block_timestamp'] = s_df.block_timestamp.max()
+		floor = s_df.sort_values('timestamp').groupby('collection').tail(1)[['collection','mn_20']]
+		tmp = merge(listings, pred_price, ensure=False)
+		tmp = tmp[tmp.price < tmp.pred_price]
+		tmp['timestamp'] = tmp.block_timestamp.astype(int)
+		tmp['days_ago'] = tmp.block_timestamp.apply(lambda x: (datetime.today() - x).days ).astype(int)
+		tmp = merge(tmp, floor)
 
-attributes[ (attributes.collection == 'thugbirdz') & (attributes.token_id == '1869') ]
-feature_values[ (feature_values.collection == 'thugbirdz') & (feature_values.feature == 'position_in_gang') ]
-sorted(feature_values[ (feature_values.collection == 'thugbirdz') ].feature.unique())
+		n = round(len(s_df) / 5000)
+		n = max(1, min(2, n))
+		# n = 1
+		for _ in range(n):
+			s_df = s_df.append(tmp[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
+			# tmp_1 = tmp[tmp.price <= 0.8 * tmp.pred_price]
+			# s_df = s_df.append(tmp_1[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
+			# tmp_2 = tmp[tmp.price <= 0.6 * tmp.pred_price]
+			# tmp_2 = s_df.append(tmp_2[[ 'block_timestamp','timestamp','collection','token_id','price','mn_20' ]])
 
-pred_price[pred_price.collection == 'peskypenguinclub'].head()
\ No newline at end of file
+
+	###########################
+	#     Calculate Floor     #
+	###########################
+	coefsdf = pd.DataFrame()
+	salesdf = pd.DataFrame()
+	attributes = pd.DataFrame()
+	pred_price = pd.DataFrame()
+	feature_values = pd.DataFrame()
+	# non-binary in model: collection_rank, temperature, weight
+	# non-binary in model; exclude from rarity: pct, rank, score
+	# exclude from model: lucky_number, shower
+	# exclude from model and rarity %: meteor_id, attribute_count, cracking_date
+	ALL_NUMERIC_COLS = ['rank','score','pct']
+	ALL_NUMERIC_COLS = ['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2']
+	MODEL_EXCLUDE_COLS = {
+		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
+		'Levana Dragon Eggs': ['meteor_id','shower','lucky_number','cracking_date','attribute_count','rarity_score_rank','rarity_score','weight']
+		, 'Solana Monkey Business': ['Clothes_Diamond']
+	}
+	MODEL_INCLUDE_COLS = {
+		# 'Solana Monkey Business': ['std_Hat_Strawhat','std_Hat_Space Warrior Hair','std_Clothes_Diamond','std_Eyes_Solana Vipers','std_Eyes_Vipers','std_Hat_Sombrero','std_Eyes_3D Glasses','std_Hat_Cowboy Hat','std_Eyes_Laser Eyes','std_matching_cop','std_matching_white','std_matching_black']
+		'Solana Monkey Business': ['std_Hat_Space Warrior Hair','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_Sombrero','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Eyes_Laser Eyes','std_Type_Solana']
+	}
+	RARITY_EXCLUDE_COLS = {
+		# 'Levana Dragon Eggs': ['collection_rank','meteor_id','shower','lucky_number','cracking_date','attribute_count','weight','temperature']
+		'Levana Dragon Eggs': ['meteor_id','attribute_count','collection_rank','transformed_collection_rank','rarity_score','rarity_score_rank','collection_rank_group']
+	}
+	NUMERIC_COLS = {
+		'Levana Dragon Eggs': ['collection_rank','temperature','transformed_collection_rank']
+	}
+	ATT_EXCLUDE_COLS = {
+		'Levana Dragon Eggs': ['attribute_count','transformed_collection_rank','collection_rank_group']
+	}
+	collection = 'Solana Monkey Business'
+	# for collection in s_df.collection.unique():
+	for collection in [ 'Solana Monkey Business' ]:
+		print('Working on collection {}'.format(collection))
+		sales = s_df[ s_df.collection == collection ]
+		metadata = m_df[ m_df.collection == collection ]
+		metadata.groupby(['feature_name','feature_value']).token_id.count().reset_index().to_csv('~/Downloads/tmp.csv', index=False)
+		metadata[metadata.token_id == '1']
+		metadata['feature_name'] = metadata.feature_name.apply(lambda x: x.strip() )
+		metadata[metadata.token_id == '1']
+		metadata[metadata.feature_name == 'rank']
+		metadata.feature_name.unique()
+		metadata[(metadata.token_id=='1') & (metadata.collection == 'Solana Monkey Business')]
+
+		# categorize columns
+		all_names = sorted(metadata.feature_name.unique())
+		model_exclude = MODEL_EXCLUDE_COLS[collection] if collection in MODEL_EXCLUDE_COLS.keys() else []
+		num_features = sorted((NUMERIC_COLS[collection] if collection in NUMERIC_COLS.keys() else []) + ALL_NUMERIC_COLS)
+		num_features = [ x for x in num_features if x in metadata.feature_name.unique() ]
+		num_metadata = metadata[metadata.feature_name.isin(num_features)]
+		num_metadata[num_metadata.feature_name == 'nft_rank']
+		cat_features = sorted([ x for x in all_names if not x in (model_exclude + num_features) ])
+		cat_metadata = metadata[metadata.feature_name.isin(cat_features)]
+
+		# create dummies for binary variables
+		num_metadata = num_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
+		num_metadata.columns = [ 'collection','token_id' ] + num_features
+
+		# create dummies for binary variables
+		cat_metadata = cat_metadata.pivot( ['collection','token_id'], ['feature_name'], ['feature_value'] ).reset_index()
+		cat_metadata.columns = [ 'collection','token_id' ] + cat_features
+		cat_metadata = calculate_percentages( cat_metadata, cat_features )
+		dummies = pd.get_dummies(cat_metadata[cat_features])
+		dummies.head(1).to_csv('~/Downloads/tmp2.csv', index=False)
+		if collection == 'Solana Monkey Business':
+			dummies['matching_cop'] = ((dummies['Clothes_Cop Vest'] == 1) & (dummies['Hat_Cop Hat'] == 1)).astype(int)
+			dummies['matching_white'] = ((dummies['Clothes_Beige Smoking'] == 1) & ((dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2']) == 1)).astype(int)
+			dummies['matching_black'] = ((dummies['Clothes_Black Smoking'] == 1) & ((dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_Black Top Hat']) == 1)).astype(int)
+			dummies['matching_top'] = ((dummies['matching_black'] == 1) | (dummies['matching_white']== 1)).astype(int)
+			# dummies['matching_green'] = ((dummies['Clothes_Green Smoking'] == 1) & ((dummies['Hat_Green Top Hat']) == 1)).astype(int)
+			# dummies['naked_1_att'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Clothes_None'] == 1)).astype(int)
+			# dummies['naked_1_att_hat'] = ((dummies['Attribute Count_1'] == 1) & (dummies['Hat_None'] == 0)).astype(int)
+			dummies['fedora'] = (dummies['Hat_Black Fedora 1'] + dummies['Hat_Black Fedora 2'] + dummies['Hat_White Fedora 1'] + dummies['Hat_White Fedora 2'] + dummies['Hat_White Fedora 2'] >= 1 ).astype(int)
+			dummies['backwards_cap'] = (dummies['Hat_Black Backwards Cap'] + dummies['Hat_Blue Backwards Cap'] + dummies['Hat_Green Backwards Cap'] + dummies['Hat_Orange Backwards Cap'] + dummies['Hat_Purple Backwards Cap'] + dummies['Hat_Solana Backwards Cap'] >= 1 ).astype(int)
+			del dummies['matching_white']
+			del dummies['matching_black']
+		cat_metadata = pd.concat([ cat_metadata.reset_index(drop=True), dummies.reset_index(drop=True) ], axis=1)
+		del cat_metadata['pct']
+
+		for c in model_exclude:
+			if c in dummies.columns:
+				del dummies[c]
+		pred_cols = num_features + list(dummies.columns)
+
+		# create training df
+		df = merge(sales, num_metadata, ['collection','token_id'], ensure=False)
+		df = merge(df, cat_metadata, ['collection','token_id'])
+		df[df.adj_nft_rank_0 == 'None']
+		df[df.adj_nft_rank_0 == 'None'][['collection','token_id','nft_rank','adj_nft_rank_0']]
+		df.adj_nft_rank_0.unique()
+		for c in num_features:
+			df[c].unique()
+			df[df.nft_rank == 'None']
+			df[df[c] == 'None'][[ 'nft_rank' ]]
+			df[c] = df[c].apply(lambda x: just_float(x))
+		df.sort_values('price', ascending=0)[['price']].head(20)
+		# df.groupby(['rarity','weight']).price.mean()
+
+		# create target cols
+		target_col = 'adj_price'
+		df[target_col] = df.apply(lambda x: max(0.7 * (x['mn_20'] - 0.2), x['price']), 1 )
+		df = df[df[target_col].notnull()]
+		df['log_price'] = df[target_col].apply(lambda x: np.log(x) )
+		df['rel_price_0'] = df[target_col] - df.mn_20
+		df['rel_price_1'] = df[target_col] / df.mn_20
+		df = df[df.mn_20 > 0]
+		df['log_mn_20'] = np.log(df.mn_20)
+		print('Training on {} sales'.format(len(df)))
+		df = standardize_df(df, pred_cols)
+
+		std_pred_cols_0 = [ 'std_{}'.format(c) for c in pred_cols ]
+		std_pred_cols = [ 'std_{}'.format(c) for c in pred_cols ]
+
+		#########################
+		#     Run the Model     #
+		#########################
+		tmp = df[std_pred_cols].count().reset_index()
+		tmp.columns = ['a','b']
+		tmp.sort_values('b').head(20)
+		rem = list(tmp[tmp.b==0].a.values)
+		std_pred_cols = [ c for c in std_pred_cols if not c in rem ]
+		if collection == 'Levana Dragon Eggs':
+			std_pred_cols = [ 'std_essence_Dark','std_collection_rank_group_0','std_rarity_Legendary','std_rarity_Rare','std_rarity_Ancient','std_collection_rank','std_transformed_collection_rank' ]
+		mn = df.timestamp.min()
+		mx = df.timestamp.max()
+		df['wt'] = df.timestamp.apply(lambda x: 3.0 ** ((x - mn) / (mx - mn)) )
+		if collection == 'Levana Dragon Eggs':
+			df['wt'] = 1
+		#     df['wt'] = df.price.apply(lambda x: 1.0 / (x ** 0.9) )
+		#     df.sort_values('price', ascending=0)[['price','wt']].head(20)
+		# std_pred_cols = [ 'std_Hat_Crown','std_adj_nft_rank_0','std_Hat_None','std_Eyes_None','std_Clothes_None','std_Attribute Count_4','std_Mouth_None','std_adj_nft_rank_1','std_Type_Dark','std_Ears_None','std_Background_Light purple','std_Hat_Black Fedora 2','std_Hat_White Fedora 2','std_Attribute Count_0','std_Type_Skeleton','std_Attribute Count_2','std_Attribute Count_1','std_Hat_Protagonist Black Hat','std_Clothes_Sailor Vest','std_Mouth_Pipe','std_Hat_Protagonist White Hat','std_Clothes_Pirate Vest','std_Hat_Roman Helmet','std_Type_Solana','std_Clothes_Beige Smoking','std_Hat_Military Helmet','std_Hat_White Fedora 1','std_naked_1_att','std_Type_Zombie','std_Clothes_Roman Armor','std_Eyes_3D Glasses','std_Clothes_Orange Kimono','std_Hat_Green Punk Hair','std_Hat_Sombrero','std_Clothes_Military Vest','std_Hat_Space Warrior Hair','std_Hat_Blue Punk Hair','std_Clothes_Orange Jacket','std_Ears_Earing Silver','std_Eyes_Laser Eyes','std_Eyes_Vipers','std_Type_Alien','std_Type_Red','std_Hat_Admiral Hat' ]
+		cur_std_pred_cols = [ 'std_adj_nft_rank_0','std_Hat_Crown','std_adj_nft_rank_1','std_Type_Skeleton','std_Type_Alien','std_Clothes_None','std_Eyes_Vipers','std_Hat_Space Warrior Hair','std_Type_Zombie','std_Clothes_Pirate Vest','std_Clothes_Orange Kimono','std_Eyes_Laser Eyes','std_Type_Solana','std_Hat_Ninja Bandana','std_Hat_Solana Backwards Cap','std_Eyes_Solana Vipers','std_Attribute Count_0','std_Attribute Count_1','std_Attribute Count_2','std_Attribute Count_3','std_Attribute Count_5','std_Hat_Strawhat','std_Hat_Admiral Hat','std_matching_top','std_Hat_Sombrero','std_matching_cop','std_Hat_Cowboy Hat','std_Hat_None' ]
+		cur_std_pred_cols = deepcopy(std_pred_cols)
+		g = df[std_pred_cols].sum().reset_index()
+		g.columns = [ 'col','cnt' ]
+		g = g.sort_values('cnt')
+		g.head(20)
+		if collection == 'Solana Monkey Busines':
+			df.loc[ df.token_id == '903', 'nft_rank' ] = 18
+			df[df.token_id=='903']
+			df[df.token_id==903]
+		df = df.reset_index(drop=True)
+		X = df[cur_std_pred_cols].values
+		y_0 = df.rel_price_0.values
+		y_1 = df.rel_price_1.values
+		folds = ku.get_folds(len(X), 5)
+		for target_col in [ 'rel_price_0', 'rel_price_1' ]:
+			print('target_col = {}'.format(target_col))
+			y_val = df[target_col].values
+			cur_err = 0
+			for model in ['las','ridge','rfr','gbr']:
+				df, bst_p, bst_r = ku.get_bst_params( model, df, X, y_val, target_col, 'y_pred_{}_{}'.format(model, target_col[-1]), verbose = True, wt_col='wt'  )
+		# df['tmp'] = df.collection_rank.apply(lambda x: int((8888 - x)/1000) )
+		# g = df.groupby('tmp').rel_price_0.mean().reset_index()
+		# g['g'] = g.tmp.apply(lambda x: (((1.42**(x**1.42)) - 1) / 20) + 0.13 )
+		# g['g'] = g.tmp.apply(lambda x: 2**x )
+		# g
+
+		# run the linear model
+		# clf_lin = Lasso(alpha=1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
+
+		# clf_lin = Ridge(alpha=1000)
+		# clf_lin = Ridge(alpha=100)
+		# clf_lin.fit(X, y_0, df.wt.values)
+		# clf_las = Lasso(alpha=1.5)
+		# clf_las.fit(X, y_0, df.wt.values)
+		# clf_rfr = RandomForestRegressor()
+		# clf_rfr.fit(X, y_0)
+		# clf_rfr.feature_importances_
+		# imp = []
+		# for a, b, c, d in zip(cur_std_pred_cols, clf_rfr.feature_importances_, clf_lin.coef_, clf_las.coef_):
+		#     imp += [[a, b, abs(c), abs(d)]]
+		# imp = pd.DataFrame(imp, columns=['col','imp','lin','las']).sort_values('imp', ascending=0)
+		# imp['imp_rk'] = imp.imp.rank(ascending=0)
+		# imp['lin_rk'] = imp.lin.rank(ascending=0)
+		# imp['las_rk'] = imp.las.rank(ascending=0)
+		# imp['include'] = 0
+		# imp.to_csv('~/Downloads/coef.csv', index=False)
+		# imp.head(50).tail(20)
+		# imp.head(40).tail(10)
+		# imp.head(50).tail(10)
+		# nft_rank should be negative
+		# adj_nft_rank_0 should be positive
+		# adj_nft_rank_1 should be positive
+		clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
+		clf_lin = Ridge(alpha=30, fit_intercept=True)
+		clf_lin = Lasso(alpha=.225)
+		def get_coefs(cols, coef):
+			coefs = []
+			for a, b in zip(cols, coef):
+				coefs += [[a,b]]
+			coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
+			# coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
+			coefs['tmp'] = coefs.col.apply(lambda x: 'nft_rank' in x )
+			# coefs['mult'] = coefs.col.apply(lambda x: -1 if x == 'std_nft_rank' else 1 )
+			coefs['mult'] = coefs.apply(lambda x: -1 if x['col'] == 'std_nft_rank' else 1 if x['coef'] >= 0 else -1 , 1 )
+			coefs['val'] = coefs.mult * coefs.coef
+			coefs = coefs.sort_values('val', ascending=0)
+			return(coefs)
+
+		mn = -1
+		print('Starting with {} cols'.format(len(cur_std_pred_cols)))
+		while mn < 0 or len(cur_std_pred_cols) > 140:
+			X = df[cur_std_pred_cols].values
+			clf_lin.fit(X, y_0, df.wt.values)
+			coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_)
+			tmp = coefs[coefs.tmp == True]
+			mn = min(coefs.val) if len(coefs) else 0
+			if mn < 0:
+				cur_std_pred_cols.remove(coefs.col.values[-1])
+			else:
+				cur_std_pred_cols.remove(coefs.col.values[-1])
+				coefs.to_csv('~/Downloads/{}_lin_coefs.csv'.format(collection), index=False)
+				len(coefs[coefs.coef !=0])
+		# print(coefs[coefs.coef !=0])
+		# print(len(coefs[coefs.coef !=0]))
+		INCLUDE_COLS = MODEL_INCLUDE_COLS[collection] if collection in MODEL_INCLUDE_COLS.keys() else []
+
+		# clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
+
+		cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique())
+		for c in INCLUDE_COLS:
+			if not c in cur_std_pred_cols:
+				cur_std_pred_cols.append(c)
+		lin_std_pred_cols = cur_std_pred_cols
+		X = df[cur_std_pred_cols].values
+		# clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
+		# clf_lin = Lasso(alpha=0.1)
+		clf_lin = Lasso(alpha=.1)
+		clf_lin.fit(X, y_0, df.wt.values)
+		coefs = get_coefs(cur_std_pred_cols, clf_lin.coef_)
+		print(coefs[coefs.coef !=0])
+		print(len(coefs[coefs.coef !=0]))
+		print(coefs[coefs.col.isin(INCLUDE_COLS)])
+		coefs[coefs.coef !=0].to_csv('./data/coefs/{}_lin_coefs.csv'.format(collection), index=False)
+		df[df['std_Attribute Count_0']!=0]
+		df['std_Attribute Count_0'].unique()
+		coefs[coefs.col.isin(INCLUDE_COLS)]
+		df['pred'] = clf_lin.predict(X)
+		df['err'] = df.pred - df.rel_price_0
+		df[df['std_Hat_Space Warrior Hair'] == 1][['pred',target_col]].mean()
+		df[df['std_Hat_Space Warrior Hair'] == 1].err.median()
+		tmp = []
+		for c in std_pred_cols:
+			if len(df[df[c] == 1]):
+				mu = round(df[df[c] == 1].err.mean())
+				md = round(df[df[c] == 1].err.median())
+				n = len(df[df[c] == 1])
+				tmp += [[ c, int(c in cur_std_pred_cols ), n, mu, md ]]
+				# print('{}: {}, {}, {}'.format(c, mu, md, n))
+		tmp = pd.DataFrame(tmp, columns=['c','i','n','mu','md']).sort_values('mu')
+		tmp.to_csv('~/Downloads/tmp4.csv', index=False)
+		tmp[tmp.i == 0].head(8)
+		tmp[tmp.i == 0].tail(8)
+		'std_Hat_Crown','std_Attribute Count_0','std_Hat_Space Warrior Hair','std_Eyes_Laser Eyes','std_Type_Solana',''
+		df[df['std_Hat_Space Warrior Hair'] == 1].err.mean()
+		df[df['std_Hat_Strawhat'] == 1][['pred','rel_price_0']].mean()
+
+		df['pred_lin'] = clf_lin.predict(X)
+		df['pred_lin'] = df.pred_lin.apply(lambda x: max(0, x)) + df.mn_20
+		df['err_lin'] = abs(((df.pred_lin - df[target_col]) / df[target_col]) )
+		# df[df.genus_Titan==1][['rarity']]
+		# df[(df.rarity=='Legendary') | (df.genus=='Titan')][['genus','rarity']]
+
+		# run the log model
+		# clf_log = Lasso(1.0) if collection in [ 'Levana Dragon Eggs' ] else RidgeCV(alphas=[1.5**x for x in range(20)])
+		clf_log = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
+		clf_log = Ridge(alpha=30)
+		clf_log = Lasso(0.003)
+		# clf_lin = RidgeCV(alphas=[1.5**x for x in range(1, 20)])
+
+		mn = -1
+		cur_std_pred_cols = deepcopy(std_pred_cols)
+		while mn < 0 or len(cur_std_pred_cols) > 140:
+			X = df[cur_std_pred_cols].values
+			clf_log.fit(X, y_1, df.wt.values)
+			coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
+			tmp = coefs[coefs.tmp == True]
+			mn = min(tmp.coef) if len(tmp) else 0
+			if mn < 0:
+				cur_std_pred_cols.remove(tmp.col.values[-1])
+			else:
+				cur_std_pred_cols.remove(coefs.col.values[-1])
+		coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
+		coefs[coefs.coef !=0].to_csv('./data/coefs/{}_log_coefs.csv'.format(collection), index=False)
+		# print(coefs[coefs.coef !=0])
+		len(coefs[coefs.coef !=0])
+		# cur_std_pred_cols = list(coefs[coefs.coef !=0].col.unique())
+		for c in INCLUDE_COLS:
+			if not c in cur_std_pred_cols:
+				cur_std_pred_cols.append(c)
+		log_std_pred_cols = cur_std_pred_cols
+		X = df[cur_std_pred_cols].values
+		clf_log = Lasso(0.001)
+		clf_log.fit(X, y_1, df.wt.values)
+		coefs = get_coefs(cur_std_pred_cols, clf_log.coef_)
+		print(coefs[coefs.coef !=0])
+		print(len(coefs[coefs.coef !=0]))
+		print(coefs[coefs.col.isin(INCLUDE_COLS)])
+		# clf_log.fit(X, y_1, df.wt.values)
+		# if collection == 'Levana Dragon Eggs':
+		#     coefs = []
+		#     for a, b in zip(std_pred_cols, clf_lin.coef_):
+		#         coefs += [[a,b]]
+		#     coefs = pd.DataFrame(coefs, columns=['col','coef']).sort_values('coef', ascending=0)
+		#     coefs.to_csv('~/Downloads/levana_log_coefs.csv', index=False)
+		df['pred_log'] = clf_log.predict(X)
+		df['pred_log'] = df.pred_log.apply(lambda x: max(1, x)) * df.mn_20
+		df['err_log'] = abs(((df.pred_log - df[target_col]) / df[target_col]) )
+		df[[ target_col,'pred_log','err_log','mn_20' ]].sort_values('err_log').tail(50)
+		df['err'] = df.err_lin * df.err_log
+
+
+		# combine the models
+		clf = LinearRegression(fit_intercept=False)
+		clf.fit( df[['pred_lin','pred_log']].values, df[target_col].values, df.wt.values )
+		df[['pred_lin','pred_log',target_col]].mean()
+		print('Price = {} * lin + {} * log'.format( round(clf.coef_[0], 2), round(clf.coef_[1], 2) ))
+		l = df.sort_values('block_timestamp', ascending=0).mn_20.values[0]
+		tmp = pd.DataFrame([[collection, clf.coef_[0], clf.coef_[1], l]], columns=['collection','lin_coef','log_coef','floor_price'])
+		if clf.coef_[0] < 0:
+			print('Only using log')
+			df['pred'] = df.pred_log
+			tmp['lin_coef'] = 0
+			tmp['log_coef'] = 1
+		elif clf.coef_[1] < 0:
+			print('Only using lin')
+			df['pred'] = df.pred_lin
+			tmp['lin_coef'] = 1
+			tmp['log_coef'] = 0
+		else:
+			print('Only using BOTH!')
+			df['pred'] = clf.predict( df[['pred_lin','pred_log']].values )
+		coefsdf = coefsdf.append(tmp)
+		df['err'] = (df.pred / df[target_col]).apply(lambda x: abs(x-1) )
+
+		# print out some summary stats
+		df['err'] = df[target_col] - df.pred
+		df['q'] = (df.pred.rank() ** 1.5 * .2) / len(df)
+		df['q'] = df.q.apply(lambda x: int(round(x)) )
+		df['pct_err'] = (df[target_col] / df.pred) - 1
+		pe_mu = df.pct_err.mean()
+		pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) ].pct_err.std()
+		pe_sd = df[ (df.pct_err > -.9) & (df.pct_err < 0.9) & (df.days_ago<=50) ].pct_err.std()
+		df['pred_price'] = df.pred#.apply(lambda x: x*(1+pe_mu) )
+		df['pred_sd'] = df.pred * pe_sd
+		# print(df.groupby('q')[['err','pred',target_col]].mean())
+		print(df[df.wt >= df.wt.median()].groupby('q')[['err','pred',target_col]].mean())
+		print(df.groupby('q')[['err','pred',target_col]].mean())
+		# df.err.mean()
+		# df[df.weight >= 3.5].err.mean()
+		df[df.pred < 200].err.mean()
+		df['collection'] = collection
+		print('Avg err last 100: {}'.format(round(df.sort_values('block_timestamp').head(100).err.mean(), 2)))
+		salesdf = salesdf.append( df.merge(s_df[s_df.sim == 0][['collection','token_id','block_timestamp','price']] )[[ 'collection','token_id','block_timestamp','price','pred','mn_20','nft_rank' ]].sort_values('block_timestamp', ascending=0) )
+
+
+		############################################################
+		#     Create Predictions for Each NFT in The Collection    #
+		############################################################
+		test = merge(num_metadata, cat_metadata, ['collection','token_id'])
+		for c in num_features:
+			test[c] = test[c].apply(lambda x: just_float(x) )
+		tail = df.sort_values('timestamp').tail(1)
+		test.loc[ test.token_id == '903', 'nft_rank' ] = 18
+		test[test.token_id=='903']
+		for c in [ 'std_timestamp','mn_20','log_mn_20' ]:
+			if c in tail.columns:
+				test[c] = tail[c].values[0]
+		test = standardize_df(test, pred_cols, df)
+
+		test['pred_lin'] = clf_lin.predict(test[lin_std_pred_cols].values)
+		test['pred_lin'] = test.pred_lin.apply(lambda x: max(0, x) + l)
+		test['pred_log'] = clf_log.predict(test[log_std_pred_cols].values)
+		test['pred_log'] = test.pred_log.apply(lambda x: max(1, x)) * l
+
+		test['pred_price'] = clf.predict( test[[ 'pred_lin','pred_log' ]].values )
+		if not check_exclude:
+			test['pred_price'] = test.pred_price.apply(lambda x: (x*0.985) )
+		test['pred_sd'] = test.pred_price * pe_sd
+		test = test.sort_values(['collection','token_id'])
+		test['rk'] = test.pred_price.rank(ascending=0, method='first')
+		test['collection'] = collection
+		pred_price = pred_price.append( test[[ 'collection','token_id','nft_rank','rk','pred_price','pred_sd' ]].sort_values('pred_price') )
+
+		cols = metadata.feature_name.unique()
+		cols = [ x for x in cols if not x in (ATT_EXCLUDE_COLS[collection] if collection in ATT_EXCLUDE_COLS.keys() else []) + ALL_NUMERIC_COLS ]
+		exclude = RARITY_EXCLUDE_COLS[collection] if collection in RARITY_EXCLUDE_COLS.keys() else []
+		for c in cols:
+			cur = metadata[metadata.feature_name == c][['collection','token_id','feature_name','feature_value']]
+			l = len(cur.token_id.unique())
+			if c in exclude:
+				cur['rarity'] = None
+			else:
+				g = cur.groupby('feature_value').token_id.count().reset_index()
+				g['rarity'] = g.token_id / l
+				cur = merge(cur, g[['feature_value','rarity']])
+			attributes = attributes.append(cur)
+
+	attributes['feature_name'] = attributes.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
+	sorted(attributes['feature_name'].unique())
+	if len(feature_values):
+		feature_values['feature_name'] = feature_values.feature_name.apply(lambda x: re.sub('_', ' ', x).title() )
+
+	coefsdf.to_csv('./data/coefsdf.csv', index=False)
+	salesdf.to_csv('./data/model_sales.csv', index=False)
+	old = pd.read_csv('./data/pred_price copy.csv')
+	old['token_id'] = old.token_id.astype(str)
+	old = pred_price.merge(old, on=['collection','token_id'])
+	old['ratio'] = old.pred_price_x / old.pred_price_y
+	old = old.sort_values('ratio')
+	old.columns = [ 'collection', 'token_id', 'nft_rank', 'rk_new', 'pred_price_new', 'pred_sd_x', 'rank', 'rk_old', 'pred_price_old', 'pred_sd_y', 'clean_token_id', 'ratio' ]
+	m = m_df[(m_df.collection.isin(pred_price.collection.unique())) & (-(m_df.feature_name.isin(['nft_rank','adj_nft_rank_0','adj_nft_rank_1','adj_nft_rank_2'])))]
+	m_p = m.pivot(['collection','token_id'], ['feature_name'], ['feature_value']).reset_index()
+	m_p.columns = [ 'collection','token_id' ] + sorted(m.feature_name.unique())
+	m_p.head()
+	old = old.merge(m_p, on=['collection','token_id'])
+	old = old[[ 'token_id', 'nft_rank', 'rk_old', 'rk_new', 'pred_price_old', 'pred_price_new', 'ratio' ] + [c for c in m_p.columns if not c in ['token_id','collection']]]
+	old.to_csv('~/Downloads/tmp1.csv', index=False)
+	pred_price.head()
+	old[old.token_id == '4857']
+	old.head()
+	old.tail()
+
+	pred_price.to_csv('./data/pred_price.csv', index=False)
+	attributes.to_csv('./data/attributes.csv', index=False)
+	attributes[attributes.rarity.isnull()]
+	feature_values.to_csv('./data/feature_values.csv', index=False)
+
+	# metadata = pd.read_csv('./data/metadata.csv')
+	# metadata['collection'] = metadata.collection.apply(lambda x: clean_name(x))
+	# metadata['token_id'] = metadata.token_id.astype(str)
+	# metadata.head()
+	# nft_rank = pred_price[[ 'collection','token_id','nft_rank' ]].rename(columns={'nft_rank':'feature_value'})
+	# nft_rank['feature_name'] = 'nft_rank'
+	# metadata = metadata[metadata.feature_name != 'nft_rank']
+	# nft_rank = merge(nft_rank, metadata[['collection','chain']].fillna('Solana').drop_duplicates())
+	# metadata = metadata.append(nft_rank)
+	# metadata.to_csv('./data/metadata.csv', index=False)
+
+
+	feature_values.to_csv('./data/feature_values.csv', index=False)
+
+	if check_exclude:
+		salesdf['rat'] = salesdf.price / salesdf.pred
+		salesdf['dff'] = salesdf.price - salesdf.pred
+		salesdf['exclude_1'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
+		salesdf['rat'] = salesdf.pred / salesdf.price
+		salesdf['dff'] = salesdf.pred - salesdf.price
+		salesdf['exclude_2'] = (((salesdf.dff >= 20) & (salesdf.rat > 4)) | ((salesdf.dff >= 40) & (salesdf.rat > 3)) | ((salesdf.dff >= 60) & (salesdf.rat > 2)) | ((salesdf.dff >= 80) & (salesdf.rat > 2))).astype(int)
+		salesdf['exclude'] = (salesdf.exclude_1 + salesdf.exclude_2).apply(lambda x: int(x>0))
+		print(salesdf.exclude_1.mean())
+		print(salesdf.exclude_2.mean())
+		print(salesdf.exclude.mean())
+		salesdf[salesdf.token_id == '2239'][['collection','price','exclude']]
+		salesdf[salesdf.exclude == 1][[ 'collection','token_id','price','exclude' ]].to_csv('./data/exclude.csv', index=False)
+
+train_model(True, False)
+train_model(False, True)
\ No newline at end of file
diff --git a/update.py b/update.py
index 0bddaaa9..d6d0a976 100644
--- a/update.py
+++ b/update.py
@@ -69,20 +69,21 @@ sales.price.max()
 
 def add_model_sales():
     sales = pd.read_csv('./data/sales.csv').rename(columns={'sale_date':'block_timestamp'})
+    print(sales.groupby('collection').token_id.count())
     sales.token_id.unique()
     sales.groupby('collection').token_id.count()
     sales[sales.collection == 'Galactic Punks']
     del sales['tx_id']
-    old = pd.read_csv('./data/pred_price copy.csv').rename(columns={'rank':'nft_rank'})
     old = pd.read_csv('./data/pred_price.csv').rename(columns={'rank':'nft_rank'})
+    old = pd.read_csv('./data/pred_price copy.csv').rename(columns={'rank':'nft_rank'})
     old.groupby('collection').token_id.count()
     sales['token_id'] = sales.token_id.astype(int).astype(str)
     old['token_id'] = old.token_id.astype(str)
     sales = sales.merge( old[['collection','token_id','nft_rank']] )
-    sales.groupby('collection').token_id.count()
     sales.head()
     sales['block_timestamp'] = sales.block_timestamp.apply(lambda x: str(x)[:19] )
     sales['price'] = sales.price.apply(lambda x: round(x, 2))
+    print(sales.groupby('collection').token_id.count())
     sales.to_csv('./data/model_sales.csv', index=False)
 
 
@@ -105,13 +106,6 @@ def update_token_ids():
         df = pd.read_csv('./data/{}.csv'.format(c))
         df['token_id'] = df.token_id.apply(lambda x: str(int(float(x))) )
         df['tmp'] = df.token_id.apply(lambda x: (str(x)[:5]))
-        df[(df.collection == 'Galactic Punks') & (df.price == 99)]
-        df[(df.collection == 'Galactic Punks') & (df.price == 99) & (df.tx_id == 'B57DB0555DED1D9593765EB9EF09796068268B91CF211CC5BF445AA0006205EC')]
-        df[(df.collection == 'Galactic Punks') & (df.price == 99) & (df.tx_id == 'B57DB0555DED1D9593765EB9EF09796068268B91CF211CC5BF445AA0006205EC')].token_id.values
-        tokens[(tokens.collection == 'Galactic Punks') ].token_id.values
-        tokens[(tokens.collection == 'Galactic Punks') & (tokens.token_id == '25984997114855597728010029317878710272') ].token_id.values
-        tokens[(tokens.token_id == '25984997114855597728010029317878710272') ].token_id.values
-        tokens[(tokens.token_id == '"25984997114855597728010029317878710272"') ].token_id.values
         df['tmp'] = df.token_id.apply(lambda x: x[:10] )
         tokens['tmp'] = tokens.token_id.apply(lambda x: x[:10] )
         len(tokens)
@@ -152,5 +146,6 @@ def update_token_ids():
         df[df.collection == 'Galactic Punks']
         print(df.groupby('collection').token_id.count() )
         df.to_csv('./data/{}.csv'.format(c), index=False)
-update_token_ids()
 
+update_token_ids()
+add_model_sales()
\ No newline at end of file
diff --git a/viz/global.R b/viz/global.R
index 1779ae2c..97c086c9 100644
--- a/viz/global.R
+++ b/viz/global.R
@@ -11,6 +11,10 @@ library(shinyjs)
 require(dplyr)
 library(htmlwidgets)
 library(reactable)
+# library(promises)
+# library(future)
+# plan(multisession)
+
 
 plotly.style <- list(
   plot_bgcolor = "rgba(0, 0, 0, 0)", 
diff --git a/viz/server.R b/viz/server.R
index 31453380..b3cea0ed 100644
--- a/viz/server.R
+++ b/viz/server.R
@@ -1,6 +1,8 @@
 server <- function(input, output, session) {
 	load('data.Rdata')
 
+    metadata <- unique(attributes[, list(collection, feature_name, feature_value)])
+
 	SD_MULT = 3
 	SD_SCALE = 1.95
 
@@ -56,6 +58,343 @@ server <- function(input, output, session) {
 		)
 	})
 
+	output$maxnftrankinput2 <- renderUI({
+		textInput(
+			inputId = 'maxnftrank2'
+			, label = NULL
+			, width = "100%"
+		)
+	})
+	output$minnftrankinput2 <- renderUI({
+		textInput(
+			inputId = 'minnftrank2'
+			, label = NULL
+			, width = "100%"
+		)
+	})
+
+	output$maxrarityrankinput2 <- renderUI({
+		textInput(
+			inputId = 'maxrarityrank2'
+			, label = NULL
+			, width = "100%"
+		)
+	})
+	output$minrarityrankinput2 <- renderUI({
+		textInput(
+			inputId = 'minrarityrank2'
+			, label = NULL
+			, width = "100%"
+		)
+	})
+
+	output$filter1select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 1) {
+            return(NULL)
+        }
+        name <- name[1]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter1'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter2select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 2) {
+            return(NULL)
+        }
+        name <- name[2]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter2'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter3select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 3) {
+            return(NULL)
+        }
+        name <- name[3]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter3'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter4select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 4) {
+            return(NULL)
+        }
+        name <- name[4]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter4'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter5select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 5) {
+            return(NULL)
+        }
+        name <- name[5]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter5'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter6select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 6) {
+            return(NULL)
+        }
+        name <- name[6]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter6'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter7select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 7) {
+            return(NULL)
+        }
+        name <- name[7]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter7'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter8select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 8) {
+            return(NULL)
+        }
+        name <- name[8]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter8'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter9select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 9) {
+            return(NULL)
+        }
+        name <- name[9]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter9'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter10select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 10) {
+            return(NULL)
+        }
+        name <- name[10]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter10'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter11select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 11) {
+            return(NULL)
+        }
+        name <- name[11]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter11'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter12select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 12) {
+            return(NULL)
+        }
+        name <- name[12]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter12'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter13select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 13) {
+            return(NULL)
+        }
+        name <- name[13]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter13'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter14select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 14) {
+            return(NULL)
+        }
+        name <- name[14]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter14'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter15select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 15) {
+            return(NULL)
+        }
+        name <- name[15]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter15'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter16select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 16) {
+            return(NULL)
+        }
+        name <- name[16]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter16'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter17select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 17) {
+            return(NULL)
+        }
+        name <- name[17]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter17'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+	output$filter18select <- renderUI({
+		selected <- getCollection()
+        name <- getMetadataColumns()
+        if(length(name) < 18) {
+            return(NULL)
+        }
+        name <- name[18]
+        m <- metadata[ collection == eval(selected) & feature_name == eval(name) ]
+		choices <- c('Any', sort(m$feature_value))
+		selectInput(
+			inputId = 'filter18'
+			, label = NULL
+			, selected = 'Any'
+			, choices = choices
+			, width = "100%"
+		)
+	})
+
 	output$collectionselect <- renderUI({
 		choices <- sort(unique(pred_price$collection))
 		selectInput(
@@ -128,7 +467,40 @@ server <- function(input, output, session) {
 			cur_0 <- pred_price[collection == eval(selected) ]
 			cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ]
 			if (nrow(cur_1)) {
-				t <- paste0("Market Rank #", format(cur_1$rk[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=","))
+				t <- paste0("Deal Score Rank #", format(cur_1$rk[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=","))
+			}
+		}
+		paste0(t)
+	})
+
+	output$salesAverage <- renderText({
+        data <- getSalesData()
+        t <- ''
+        if (nrow(data)) {
+            p <- format(round(mean(head(data$price, 100)), 1), big.mark=',')
+            f <- format(round(mean(head(data$vs_floor, 100)), 1), big.mark=',')
+            print('p')
+            print(p)
+            print(f)
+            t <- paste0(p, ' $SOL (+',f,' vs the floor)')
+        }
+		paste0(t)
+	})
+
+	output$rarityrank <- renderText({
+		id <- getTokenId()
+		selected <- getCollection()
+		chain <- getChain()
+		t <- ""
+		if( length(id) == 0 | length(selected) == 0 ) {
+			return(t)
+		}
+		if (!is.na(id) & !is.na(selected)) {
+			cur_0 <- pred_price[collection == eval(selected) ]
+			cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ]
+			if (nrow(cur_1)) {
+                a <- ifelse( chain == 'Solana', 'HowRare', 'NotFoundTerra' )
+				t <- paste0(a, " Rank #", format(cur_1$nft_rank[1], big.mark=",")," / ",format(nrow(cur_0), big.mark=","))
 			}
 		}
 		paste0(t)
@@ -213,7 +585,7 @@ server <- function(input, output, session) {
 			return(head(attributes, 0))
 		}
 		cur <- attributes[ token_id == eval(as.numeric(id)) & collection == eval(selected) ]
-		# cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pred_vs_baseline, pct_vs_baseline) ], all.x=TRUE )
+		cur <- merge( cur, feature_values[collection == eval(selected), list(feature_name, feature_value, pct_vs_baseline) ], all.x=TRUE )
 		cur <- cur[order(rarity)]
 		# floor <- getFloors()[2]
 		# log_coef <- coefsdf[ collection == eval(selected) ]$log_coef[1]
@@ -228,9 +600,9 @@ server <- function(input, output, session) {
 		# 	mult <- ratio / s
 		# 	cur[, pct_vs_baseline := pct_vs_baseline * eval(mult) ]
 		# }
-		cur[, vs_baseline := 0 ]
-		cur[, pred_vs_baseline := 0 ]
-		cur[, vs_baseline := 0 ]
+		# cur[, vs_baseline := 0 ]
+		# cur[, pred_vs_baseline := 0 ]
+		# cur[, vs_baseline := 0 ]
 		# cur[, vs_baseline := round((pred_vs_baseline * eval(lin_coef)) + (pct_vs_baseline * eval(floor) * eval(log_coef) ), 1) ]
 		# cur[, pred_vs_baseline := round(pred_vs_baseline, 1) ]
 		# cur[, vs_baseline := round(pred_vs_baseline + (pct_vs_baseline * eval(floor)), 1) ]
@@ -246,7 +618,7 @@ server <- function(input, output, session) {
 
 		# reactable(data[, list( feature, value, rarity, vs_baseline, pred_vs_baseline, pct_vs_baseline )],
 		# data <- data[, list( feature, value, rarity, pct_vs_baseline )]
-		data <- data[, list( feature_name, feature_value, rarity )]
+		data <- data[, list( feature_name, feature_value, rarity, pct_vs_baseline )]
 		reactable(data,
 			defaultColDef = colDef(
 				headerStyle = list(background = "#10151A")
@@ -256,16 +628,16 @@ server <- function(input, output, session) {
 			outlined = FALSE,
 			columns = list(
 				feature_name = colDef(name = "Attribute", align = "left"),
-				feature_value = colDef(name = "Value", align = "left"),
-				rarity = colDef(name = "Rarity", align = "left")
-				# pct_vs_baseline = colDef(
-				# 	name="Value", header=with_tooltip("Value", "The estimated price impact of this feature vs the floor")
-				# 	, html = TRUE
-				# 	, align = "left"
-				# 	, cell = function(x) {
-				# 		htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%'))
-				# 	}
-				# )
+				feature_value = colDef(name = "Name", align = "left"),
+				rarity = colDef(name = "Rarity", align = "left"),
+				pct_vs_baseline = colDef(
+					name="General Price Impact", header=with_tooltip("General Price Impact", "The estimated price impact of this feature vs the floor")
+					, html = TRUE
+					, align = "left"
+					, cell = function(x) {
+						htmltools::tags$span(paste0('+', format(round(x*1000)/10, digits=4, decimal.mark=".", big.mark=","), '%'))
+					}
+				)
 			)
 	    )
 	})
@@ -328,41 +700,221 @@ server <- function(input, output, session) {
 	    )
 	})
 
-	output$salestable <- renderReactable({
+    getFilteredSalesData <- function(data, selected, val, i) {
+        if(length(val) > 0) {
+            if(val != 'Any') {
+                att <- getMetadataColumns()
+                if(length(att) >= i) {
+                    att <- att[i]
+                    include <- attributes[collection == eval(selected) & feature_name == eval(att) & feature_value == eval(val), list(token_id) ]
+                    data <- merge(data, include)
+                }
+            }
+        }
+        return(data)
+    }
+
+    getSalesDataFn <- function(selected, sales, tokens, pred_price, attributes) {
+		data <- sales[ collection == eval(selected)]
+		m <- pred_price[collection == eval(selected), list(token_id, rk)]
+		data <- merge(data, m, all.x=TRUE)
+
+		data <- merge(data, tokens[collection == eval(selected), list(collection, token_id, image_url)], all.x=T )
+		data <- data[, list( token_id, image_url, block_timestamp, price, pred, mn_20, nft_rank, rk )]
+
+        data <- data[order(-block_timestamp)]
+
+        data[, vs_floor := pmax(0, price - mn_20) ]
+
+        m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, feature_value)], token_id ~ feature_name, value.var='feature_value')
+        names <- colnames(m)
+        data <- merge(data, m, all.x=TRUE)
+
+
+        data <- data[order(-block_timestamp)]
+        data[, mn_20 := pmin(mn_20, price) ]
+        data[, mn_20_label := paste0(format(round(mn_20, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
+        data[, price_label := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
+        data[, block_timestamp := substr(block_timestamp, 1, 10) ]
+        return(data)
+    }
+
+    getSalesData <- reactive({
 		selected <- getCollection()
 		if( length(selected) == 0 ) {
 			return(NULL)
 		}
 		# data <- sales[ collection == eval(selected) , list( token_id, block_timestamp, price, pred, mn_20 )]
-		data <- sales[ collection == eval(selected) , list( token_id, block_timestamp, price )]
-		data[, price := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
-		# data[, pred := paste0(format(round(pred, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
-
+		data <- sales[ collection == eval(selected)]
 		m <- pred_price[collection == eval(selected), list(token_id, rk)]
 		data <- merge(data, m, all.x=TRUE)
 
-		m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, clean_name)], token_id ~ feature_name, value.var='clean_name')
-		data <- merge(data, m, all.x=TRUE)
+        if(input$maxnftrank2 != '') {
+            r <- as.numeric(input$maxnftrank2)
+            data <- data[ rk <= eval(r) ]
+        }
+        if(input$minnftrank2 != '') {
+            data <- data[ rk >= eval(as.numeric(input$minnftrank2)) ]
+        }
+        if(input$maxrarityrank2 != '') {
+            r <- as.numeric(input$maxrarityrank2)
+            data <- data[ nft_rank <= eval(r) ]
+        }
+        if(input$minrarityrank2 != '') {
+            data <- data[ nft_rank >= eval(as.numeric(input$minrarityrank2)) ]
+        }
+        data <- getFilteredSalesData(data, selected, input$filter1, 1)
+        data <- getFilteredSalesData(data, selected, input$filter2, 2)
+        data <- getFilteredSalesData(data, selected, input$filter3, 3)
+        data <- getFilteredSalesData(data, selected, input$filter4, 4)
+        data <- getFilteredSalesData(data, selected, input$filter5, 5)
+        data <- getFilteredSalesData(data, selected, input$filter6, 6)
+        data <- getFilteredSalesData(data, selected, input$filter7, 7)
+        data <- getFilteredSalesData(data, selected, input$filter8, 8)
+        data <- getFilteredSalesData(data, selected, input$filter9, 9)
+        data <- getFilteredSalesData(data, selected, input$filter10, 10)
+        data <- getFilteredSalesData(data, selected, input$filter11, 11)
+        data <- getFilteredSalesData(data, selected, input$filter12, 12)
+        data <- getFilteredSalesData(data, selected, input$filter13, 13)
+        data <- getFilteredSalesData(data, selected, input$filter14, 14)
+        data <- getFilteredSalesData(data, selected, input$filter15, 15)
+        data <- getFilteredSalesData(data, selected, input$filter16, 16)
+        data <- getFilteredSalesData(data, selected, input$filter17, 17)
+        data <- getFilteredSalesData(data, selected, input$filter18, 18)
+        data <- getFilteredSalesData(data, selected, input$filter19, 19)
+        data <- getFilteredSalesData(data, selected, input$filter20, 20)
 
-		data <- data[order(-block_timestamp)]
+		data <- merge(data, tokens[collection == eval(selected), list(collection, token_id, image_url)], all.x=T )
+		data <- data[, list( token_id, image_url, block_timestamp, price, pred, mn_20, nft_rank, rk )]
 
-		reactable(data,
-			defaultColDef = colDef(
-				headerStyle = list(background = "#10151A")
-			),
-			filterable = TRUE,
-			borderless = TRUE,
-			outlined = FALSE,
-			searchable = FALSE,
-			columns = list(
-				token_id = colDef(name = "Token ID", align = "left"),
-				block_timestamp = colDef(name = "Sale Date", align = "left"),
-				price = colDef(name = "Price", align = "left"),
-				# pred = colDef(name = "Fair Market Price", align = "left"),
-				rk = colDef(name = "DS Rank", align = "left")
-				# mn_20 = colDef(name = "Floor Price", align = "left")
-			)
-	    )
+        data <- data[order(-block_timestamp)]
+
+        data[, vs_floor := pmax(0, price - mn_20) ]
+
+        m <- dcast(attributes[collection == eval(selected), list(token_id, feature_name, feature_value)], token_id ~ feature_name, value.var='feature_value')
+        names <- colnames(m)
+        data <- merge(data, m, all.x=TRUE)
+
+
+        data <- data[order(-block_timestamp)]
+        data[, mn_20 := pmin(mn_20, price) ]
+        data[, mn_20_label := paste0(format(round(mn_20, 1), scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
+        data[, price_label := paste0(format(price, scientific = FALSE, digits=2, decimal.mark=".", big.mark=","))]
+        data[, block_timestamp := substr(block_timestamp, 1, 10) ]
+        return(data)
+    })
+
+    getMetadataColumns <- reactive({
+        selected <- getCollection()
+        m <- unique(metadata[ collection == eval(selected), list(feature_name) ])
+		names <- sort(m$feature_name)
+        return(names)
+    })
+
+    getFilterText <- function(i) {
+		t <- ''
+        m <- getMetadataColumns()
+        if(length(m) >= i) {
+            t <- m[i]
+        }
+        return(t)
+    }
+
+
+	output$filter1 <- renderText({
+		paste0(getFilterText(1))
+	})
+	output$filter2 <- renderText({
+		paste0(getFilterText(2))
+	})
+	output$filter3 <- renderText({
+		paste0(getFilterText(3))
+	})
+	output$filter4 <- renderText({
+		paste0(getFilterText(4))
+	})
+	output$filter5 <- renderText({
+		paste0(getFilterText(5))
+	})
+	output$filter6 <- renderText({
+		paste0(getFilterText(6))
+	})
+	output$filter7 <- renderText({
+		paste0(getFilterText(7))
+	})
+	output$filter8 <- renderText({
+		paste0(getFilterText(8))
+	})
+	output$filter9 <- renderText({
+		paste0(getFilterText(9))
+	})
+	output$filter10 <- renderText({
+		paste0(getFilterText(10))
+	})
+	output$filter11 <- renderText({
+		paste0(getFilterText(11))
+	})
+	output$filter12 <- renderText({
+		paste0(getFilterText(12))
+	})
+	output$filter13 <- renderText({
+		paste0(getFilterText(13))
+	})
+	output$filter14 <- renderText({
+		paste0(getFilterText(14))
+	})
+	output$filter15 <- renderText({
+		paste0(getFilterText(15))
+	})
+	output$filter16 <- renderText({
+		paste0(getFilterText(16))
+	})
+	output$filter17 <- renderText({
+		paste0(getFilterText(17))
+	})
+	output$filter18 <- renderText({
+		paste0(getFilterText(18))
+	})
+	output$filter19 <- renderText({
+		paste0(getFilterText(19))
+	})
+	output$filter20 <- renderText({
+		paste0(getFilterText(20))
+	})
+
+	output$salestable <- renderReactable({
+		selected <- getCollection()
+		if( length(selected) == 0 ) {
+			return(NULL)
+		}
+        # data <- future(getSalesData()) %...>% head() %>% print()
+        data <- getSalesData()
+        # data <- future(getSalesDataFn(selected, sales, tokens, pred_price, attributes)) %...>% 
+            reactable(data, 
+                defaultColDef = colDef(
+                    headerStyle = list(background = "#10151A")
+                ),
+                # filterable = TRUE,
+                borderless = TRUE,
+                outlined = FALSE,
+                searchable = FALSE,
+                columns = list(
+                    token_id = colDef(name = "Token ID", align = "left"),
+                    image_url = colDef(name = "Token", align = "left", cell = function(value, index) {
+                        if(index <= 100) {
+                            htmltools::tags$img(src=value)
+                        } else {
+                            return(NULL)
+                        }
+                    }),
+                    block_timestamp = colDef(name = "Sale Date", align = "left"),
+                    price_label = colDef(name = "Price", align = "left"),
+                    pred = colDef(name = "Fair Market Price", align = "left"),
+                    rk = colDef(name = "Deal Score Rank", align = "left"),
+                    nft_rank = colDef(name = "Rarity Rank", align = "left"),
+                    mn_20_label = colDef(name = "Floor Price", align = "left")
+                )
+            )
 	})
 
 	getPriceDistributionData <- reactive({
@@ -538,6 +1090,9 @@ server <- function(input, output, session) {
 		df <- merge(df, tokens[collection == eval(selected), list(collection, token_id, image_url)] )
 		tuple <- getConvertedPrice()
 		floors <- getFloors()
+        print('getListingData')
+        print(tuple)
+        print(floors)
 
 		df[, pred_price_0 := pred_price ]
 		df[, pred_price := pred_price + eval(tuple[1]) + ( eval(tuple[2]) * pred_price / eval(floors[1]) ) ]
@@ -550,7 +1105,7 @@ server <- function(input, output, session) {
 		df[, pred_price := paste0(format(round(pred_price, 1), digits=3, decimal.mark=".", big.mark=",")) ]
 
 		df <- df[, list(image_url, token_id, price, pred_price, deal_score, rk)]
-		m <- dcast(attributes[collection == eval(selected)], collection + token_id ~ feature_name, value.var='clean_name')
+		m <- dcast(attributes[collection == eval(selected)], collection + token_id ~ feature_name, value.var='feature_value')
 		df <- merge(df, m, all.x=TRUE)
 		df[, collection := NULL]
 		df <- df[order(-deal_score)]
@@ -564,6 +1119,8 @@ server <- function(input, output, session) {
 		if( nrow(df) == 0 ) {
 			return(NULL)
 		}
+        print('head(df)')
+        print(head(df))
 		df <- df[ deal_score >= 10 ]
 		df[, hover_text := paste0('<b>#',token_id,'</b><br>Listing Price: ',price,'<br>Fair Market Price: ',pred_price,'<br>Deal Score: ',deal_score) ]
         f <- min(df[price > 0]$price)
@@ -706,7 +1263,10 @@ server <- function(input, output, session) {
 		if (name == 'solana-monkey-business') name <- 'smb'
 		if (name == 'degen-ape-academy') name <- 'degenapes'
 		href <- paste0('https://howrare.is/',name,'/',id)
-		url <- span("*Rarity from ", a("howrare.is", href=href)," used in the model")
+        cur_0 <- pred_price[collection == eval(selected) ]
+        cur_1 <- cur_0[ token_id == eval(as.numeric(input$tokenid)) ]
+
+		url <- span("*Rarity from ", a("howrare.is", href=href),paste0(" (rank #",format(cur_1$nft_rank[1], big.mark = ','),") used in the model"))
 		HTML(paste(url))
     })
 
@@ -755,14 +1315,18 @@ server <- function(input, output, session) {
 			filterable = TRUE,
 			outlined = FALSE,
 			columns = list(
-				image_url = colDef(name = "Token", align = "left", cell = function(value) {
-                    htmltools::tags$img(src=value)
+                image_url = colDef(name = "Token", align = "left", cell = function(value, index) {
+                    if(index <= 100) {
+                        htmltools::tags$img(src=value)
+                    } else {
+                        return(NULL)
+                    }
                 }),
 				token_id = colDef(name = "Token ID", align = "left"),
 				price = colDef(name = "Listed Price", align = "left"),
 				pred_price = colDef(name = "Fair Market Price", align = "left"),
 				deal_score = colDef(name = "Deal Score", align = "left"),
-				rk = colDef(name = "Market Rank", align = "left")
+				rk = colDef(name = "Deal Score Rank", align = "left")
 			),
 			searchable = FALSE
 	    )
diff --git a/viz/ui.R b/viz/ui.R
index 642f83d9..73588524 100644
--- a/viz/ui.R
+++ b/viz/ui.R
@@ -78,13 +78,16 @@ fluidPage(
 					div(class = "subtitle", textOutput("tokenrank", inline=TRUE), icon(class="padding-left-5", id="rank-tooltip", "info-circle") )
 					, bsTooltip(id = "rank-tooltip", title = "Dynamic value rank based on the estimated fair market price modeled from historical sales. Model and rank will update periodically as we get more sales data.", placement = "bottom", trigger = "hover")
 				)
+				, div(
+					div(class = "subtitle", textOutput("rarityrank", inline=TRUE))
+				)
 				, div(class = "link", uiOutput('randomearthurl'))
 			)
 			, fluidRow(
-				column(6
+				column(5
 					, div(class = "token-img", uiOutput("tokenimg"))
 				)
-				, column(6, div(
+				, column(7, div(
 					class = "table"
 					, reactableOutput("attributestable")
 					, bsTooltip(id = "value-tooltip", title = "Represents the dollar impact this feature has on the price vs the floor", placement = "bottom", trigger = "hover")
@@ -110,12 +113,11 @@ fluidPage(
 			, div(class='description', 'Click a dot to select the token')
 		)
 		, fluidRow(
-			column(4
+            class = 'filters'
+			, column(4
 				, div(
 					class = "inputtitle"
 					, "Max Price"
-					# , icon(id="floor-price-tooltip", "info-circle")
-					# , bsTooltip(id = "floor-price-tooltip", title = "Update this number to the current floor price of the collection, which will update the rest of the numbers on this page", placement = "bottom", trigger = "hover")
 				)
 				, fluidRow(uiOutput("maxpriceinput"))
 			)
@@ -135,6 +137,126 @@ fluidPage(
 		class="grey8row"
 		, h2("Historical Sales", icon(class="padding-left-10", id="historical-sales-tooltip", "info-circle"))
 		, bsTooltip(id = "historical-sales-tooltip", title = "This app is still in beta - sales data may be incomplete or delayed", placement = "bottom", trigger = "hover")
+		, fluidRow(
+            class = 'filters'
+			, column(3
+				, div(
+					class = "inputtitle"
+					, "Min Deal Score Rank"
+				)
+				, fluidRow(uiOutput("minnftrankinput2"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, "Max Deal Score Rank"
+				)
+				, fluidRow(uiOutput("maxnftrankinput2"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, "Min Rarity Rank"
+				)
+				, fluidRow(uiOutput("minrarityrankinput2"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, "Max Rarity Rank"
+				)
+				, fluidRow(uiOutput("maxrarityrankinput2"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter1', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter1select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter2', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter2select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter3', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter3select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter4', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter4select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter5', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter5select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter6', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter6select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter7', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter7select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter8', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter8select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter9', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter9select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter10', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter10select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter11', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter11select"))
+			)
+			, column(3
+				, div(
+					class = "inputtitle"
+					, textOutput('filter12', inline=TRUE)
+				)
+				, fluidRow(uiOutput("filter12select"))
+			)
+		)
+        , div(
+            class = 'padding-bottom-1'
+            , 'Average from most recent 100 sales using these filters: '
+            , textOutput('salesAverage', inline=TRUE)
+        )
 		, div(class = "table", reactableOutput("salestable"))
 		, div(class = "description", 'This app is still in beta - sales data may be incomplete or delayed')
 	)
diff --git a/viz/www/styles.css b/viz/www/styles.css
index f3b65bd0..1ec00e2d 100644
--- a/viz/www/styles.css
+++ b/viz/www/styles.css
@@ -232,6 +232,32 @@ input[type=number] {
 }
 
 
+/*******************/
+/*     Filters     */
+/*******************/
+.filters > div > .inputtitle {
+    font-weight: 100;
+    font-size: 1.25rem;
+    padding: 0;
+    margin: 0;
+}
+.filters > div > .row > div > .form-group {
+    padding: 0 1rem 1rem 0;
+    margin: 0;
+}
+.filters .form-control, .filters .selectize-input > *, .filters .selectize-dropdown > * {
+    font-weight: 100;
+    font-size: 1.25rem;
+}
+.filters > div {
+    padding: 0;
+    margin: 0;
+}
+
+.filter:first-child() {
+    padding-left: 0;
+}
+
 /***********************/
 /*     React Table     */
 /***********************/
@@ -298,6 +324,9 @@ tr {
 /*******************/
 /*     General     */
 /*******************/
+.padding-bottom-1 {
+    padding-bottom: 1rem;
+}
 .row {
 	margin: 0;
 }