* feat: analyze profits - closes #19 Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * feat: analyze profits - closes #19 Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * fix peotry lock Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * feat: add save to csv option Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * fix: update dockerfile to resolve deps automatically at build-time Signed-off-by: Luca Georges Francois <luca@quartz.technology> * feat: add porfit by day Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * feat: add failures saving Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * fix: launch script Signed-off-by: Arthurim <arthurbdauphine@gmail.com> * feat: get rpc url from env Signed-off-by: Arthurim <arthurbdauphine@gmail.com> Signed-off-by: Arthurim <arthurbdauphine@gmail.com> Signed-off-by: Luca Georges Francois <luca@quartz.technology> Co-authored-by: Luca Georges Francois <luca@quartz.technology>
263 lines
9.9 KiB
Python
263 lines
9.9 KiB
Python
import datetime
|
|
import os
|
|
|
|
import pandas as pd
|
|
import web3
|
|
from profit_analysis.block_utils import add_block_timestamp
|
|
from profit_analysis.coingecko import (
|
|
add_cg_ids,
|
|
get_address_to_coingecko_ids_mapping,
|
|
get_coingecko_historical_prices,
|
|
)
|
|
from profit_analysis.column_names import (
|
|
AMOUNT_DEBT_KEY,
|
|
AMOUNT_RECEIVED_KEY,
|
|
CG_ID_DEBT_KEY,
|
|
CG_ID_RECEIVED_KEY,
|
|
DECIMAL_DEBT_KEY,
|
|
PRICE_DEBT_KEY,
|
|
PRICE_KEY,
|
|
PRICE_RECEIVED_KEY,
|
|
TIMESTAMP_KEY,
|
|
TOKEN_DEBT_KEY,
|
|
TOKEN_RECEIVED_KEY,
|
|
)
|
|
from profit_analysis.constants import DATA_PATH
|
|
from profit_analysis.token_utils import get_decimals
|
|
|
|
from mev_inspect.crud.read import read_profit_from_to
|
|
|
|
"""
|
|
Steps:
|
|
1. given blockfrom and block to, read the profit
|
|
"""
|
|
|
|
WETH_TOKEN_ADDRESS = "0x7ceB23fD6bC0adD59E62ac25578270cFf1b9f619"
|
|
PD_DATETIME_FORMAT = "datetime64[ns]"
|
|
|
|
|
|
def analyze_profit(inspect_db_session, block_from, block_to, save_to_csv=False):
|
|
profit = read_profit_from_to(inspect_db_session, block_from, block_to)
|
|
w3 = create_web3()
|
|
profit = add_block_timestamp(w3, profit)
|
|
profit = add_cg_ids(profit)
|
|
profit = get_usd_profit(profit, save_to_csv)
|
|
print(profit)
|
|
return profit
|
|
|
|
|
|
def get_usd_profit(profit, save_to_csv=False):
|
|
"""
|
|
For each token involved in mev transactions, will get its price at the time of the transaction and
|
|
compute the profit of each mev transaction.
|
|
|
|
:param profit: pd.DataFrame, with columns = ['block_number', 'timestamp', 'transaction_hash',
|
|
'token_debt', 'amount_debt', 'cg_id_debt',
|
|
'token_received', 'amount_received', 'cg_id_received']
|
|
:param save_to_csv: bool, whether to save the analysed profits to csv or not
|
|
:return: pd.DataFrame, with columns = ['block_number', 'timestamp', 'date', 'transaction_hash',
|
|
'amount_received', 'token_received', 'price_received',
|
|
'amount_debt', 'token_debt', 'price_debt',
|
|
'profit_usd' ]
|
|
"""
|
|
tokens = profit[CG_ID_RECEIVED_KEY].unique()
|
|
mapping = get_address_to_coingecko_ids_mapping()
|
|
profit_with_price_tokens = pd.DataFrame()
|
|
failures = {}
|
|
for token in tokens:
|
|
print("Processing", token)
|
|
try:
|
|
|
|
profit_by_received_token = pd.DataFrame(
|
|
profit.loc[profit[CG_ID_RECEIVED_KEY] == token]
|
|
)
|
|
profit_by_received_token[TIMESTAMP_KEY] = pd.to_datetime(
|
|
profit_by_received_token[TIMESTAMP_KEY], format="%Y-%m-%d %H:%M:%S"
|
|
)
|
|
|
|
dates = pd.to_datetime(profit_by_received_token[TIMESTAMP_KEY].unique())
|
|
# @TODO: What is an optimal value here?
|
|
# looks like sometimes there is no price for hours???
|
|
offset_minutes = 30
|
|
date_min = int(
|
|
(dates.min() - datetime.timedelta(minutes=offset_minutes)).timestamp()
|
|
)
|
|
date_max = int(
|
|
(dates.max() + datetime.timedelta(minutes=offset_minutes)).timestamp()
|
|
)
|
|
|
|
# get received token prices
|
|
token_prices = get_coingecko_historical_prices(date_min, date_max, token)
|
|
token_prices = token_prices.rename(columns={PRICE_KEY: PRICE_RECEIVED_KEY})
|
|
token_prices[TOKEN_RECEIVED_KEY] = token
|
|
|
|
# get received token decimals
|
|
decimals = get_decimals(
|
|
profit_by_received_token[TOKEN_RECEIVED_KEY].values[0]
|
|
)
|
|
|
|
# get debt tokens prices
|
|
debt_tokens_prices = pd.DataFrame()
|
|
for cg_id_debt in (
|
|
profit_by_received_token[CG_ID_DEBT_KEY].astype(str).unique().tolist()
|
|
):
|
|
if cg_id_debt != "nan":
|
|
debt_token_prices = get_coingecko_historical_prices(
|
|
date_min, date_max, cg_id_debt
|
|
)
|
|
debt_token_prices[CG_ID_DEBT_KEY] = cg_id_debt
|
|
debt_token = mapping.loc[
|
|
mapping[CG_ID_DEBT_KEY] == cg_id_debt, TOKEN_DEBT_KEY
|
|
].values[0]
|
|
debt_token_prices[TOKEN_DEBT_KEY] = debt_token
|
|
debt_tokens_prices = pd.concat(
|
|
[debt_tokens_prices, debt_token_prices]
|
|
)
|
|
debt_tokens_prices = debt_tokens_prices.rename(
|
|
columns={PRICE_KEY: PRICE_DEBT_KEY}
|
|
)
|
|
|
|
# get debt tokens decimals
|
|
debt_tokens_decimals = pd.DataFrame(
|
|
columns=[TOKEN_DEBT_KEY, DECIMAL_DEBT_KEY]
|
|
)
|
|
for debt_token in (
|
|
profit_by_received_token[TOKEN_DEBT_KEY].astype(str).unique().tolist()
|
|
):
|
|
if debt_token != "":
|
|
debt_token_decimals = get_decimals(debt_token)
|
|
debt_tokens_decimals = pd.concat(
|
|
[
|
|
debt_tokens_decimals,
|
|
pd.DataFrame(
|
|
[[debt_token, debt_token_decimals]],
|
|
columns=[TOKEN_DEBT_KEY, DECIMAL_DEBT_KEY],
|
|
),
|
|
]
|
|
)
|
|
profit_by_received_token = profit_by_received_token.merge(
|
|
debt_tokens_decimals, on=TOKEN_DEBT_KEY, how="outer"
|
|
)
|
|
profit_by_received_token.loc[
|
|
pd.isna(profit_by_received_token[AMOUNT_DEBT_KEY]), AMOUNT_DEBT_KEY
|
|
] = 0
|
|
|
|
# apply decimals
|
|
profit_by_received_token[AMOUNT_RECEIVED_KEY] = pd.to_numeric(
|
|
profit_by_received_token[AMOUNT_RECEIVED_KEY]
|
|
).div(10**decimals)
|
|
profit_by_received_token[AMOUNT_DEBT_KEY] = pd.to_numeric(
|
|
profit_by_received_token[AMOUNT_DEBT_KEY]
|
|
)
|
|
|
|
# set up timestamps for merge
|
|
token_prices[TIMESTAMP_KEY] = pd.to_datetime(token_prices[TIMESTAMP_KEY])
|
|
|
|
# merge received token prices
|
|
profit_with_price_token = pd.merge_asof(
|
|
profit_by_received_token.astype({TIMESTAMP_KEY: PD_DATETIME_FORMAT})
|
|
.sort_values(TIMESTAMP_KEY)
|
|
.convert_dtypes(),
|
|
token_prices[[TIMESTAMP_KEY, PRICE_RECEIVED_KEY]]
|
|
.astype({TIMESTAMP_KEY: PD_DATETIME_FORMAT})
|
|
.sort_values(TIMESTAMP_KEY)
|
|
.convert_dtypes(),
|
|
direction="nearest",
|
|
on=TIMESTAMP_KEY,
|
|
)
|
|
|
|
if len(debt_tokens_prices) > 0:
|
|
debt_tokens_prices[TIMESTAMP_KEY] = pd.to_datetime(
|
|
debt_tokens_prices[TIMESTAMP_KEY]
|
|
)
|
|
# merge debt token prices
|
|
profit_with_price_token = pd.merge_asof(
|
|
profit_with_price_token.astype({TIMESTAMP_KEY: PD_DATETIME_FORMAT})
|
|
.sort_values(TIMESTAMP_KEY)
|
|
.convert_dtypes(),
|
|
debt_tokens_prices[[TIMESTAMP_KEY, PRICE_DEBT_KEY]]
|
|
.astype({TIMESTAMP_KEY: PD_DATETIME_FORMAT})
|
|
.sort_values(TIMESTAMP_KEY)
|
|
.convert_dtypes(),
|
|
direction="nearest",
|
|
on=TIMESTAMP_KEY,
|
|
by=TOKEN_DEBT_KEY,
|
|
)
|
|
category = "liquidation"
|
|
else:
|
|
category = "arbitrage"
|
|
profit_with_price_token[PRICE_DEBT_KEY] = 0
|
|
|
|
profit_with_price_token["category"] = category
|
|
profit_with_price_tokens = pd.concat(
|
|
[profit_with_price_tokens, profit_with_price_token]
|
|
)
|
|
except Exception as e:
|
|
# @TODO: save into list to add later
|
|
print(" Failed for token=", token)
|
|
print(e)
|
|
failures[token] = e
|
|
print("Finished processing all tokens")
|
|
profit_with_price_tokens[PRICE_DEBT_KEY] = profit_with_price_tokens[
|
|
PRICE_DEBT_KEY
|
|
].fillna(value=0)
|
|
profit_with_price_tokens[AMOUNT_DEBT_KEY] = profit_with_price_tokens[
|
|
AMOUNT_DEBT_KEY
|
|
].fillna(value=0)
|
|
profit_with_price_tokens["profit_usd"] = (
|
|
profit_with_price_tokens[AMOUNT_RECEIVED_KEY]
|
|
* profit_with_price_tokens[PRICE_RECEIVED_KEY]
|
|
- profit_with_price_tokens[AMOUNT_DEBT_KEY]
|
|
* profit_with_price_tokens[PRICE_DEBT_KEY]
|
|
)
|
|
profit_with_price_tokens = profit_with_price_tokens.reset_index(drop=True)
|
|
profit_with_price_tokens["date"] = profit_with_price_tokens[
|
|
TIMESTAMP_KEY
|
|
].dt.normalize()
|
|
if save_to_csv:
|
|
profit.to_csv(DATA_PATH + "usd_profit.csv", index=False)
|
|
pd.DataFrame(failures.items(), columns=["token", "error"]).to_csv(
|
|
DATA_PATH + "analyze_profit_failures.csv", index=False
|
|
)
|
|
return profit_with_price_tokens[
|
|
[
|
|
"block_number",
|
|
"timestamp",
|
|
"date",
|
|
"transaction_hash",
|
|
"amount_received",
|
|
"token_received",
|
|
"price_received",
|
|
"amount_debt",
|
|
"token_debt",
|
|
"price_debt",
|
|
"profit_usd",
|
|
"category",
|
|
]
|
|
]
|
|
|
|
|
|
def get_profit_by(profit_with_price_tokens, col, save_to_csv=False):
|
|
profit_by_block = (
|
|
profit_with_price_tokens.groupby([col])
|
|
.agg({"profit_usd": ["sum", "mean", "median", "count"]})
|
|
.reset_index()
|
|
)
|
|
profit_by_block.columns = profit_by_block.columns.droplevel(0)
|
|
profit_by_block.rename(columns={"": col}, inplace=True)
|
|
if save_to_csv:
|
|
file_name = DATA_PATH + "profit_by_" + col + ".csv"
|
|
print(file_name)
|
|
profit_by_block.to_csv(file_name, index=False)
|
|
return profit_by_block
|
|
|
|
|
|
def create_web3():
|
|
web3_rpc_url = os.environ.get("RPC_URL")
|
|
w3_provider = web3.Web3(web3.Web3.HTTPProvider(web3_rpc_url))
|
|
w3_provider.middleware_onion.inject(web3.middleware.geth_poa_middleware, layer=0)
|
|
if w3_provider.isConnected():
|
|
return w3_provider
|
|
else:
|
|
raise Exception("Failed to connect")
|