diff --git a/.gitignore b/.gitignore index 6cc5bdc..534a495 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # venv and test cache files env/ __pycache__ +.mypy_cache *.swp + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..bea15aa --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: +- repo: https://github.com/ambv/black + rev: 20.8b1 + hooks: + - id: black + language_version: python3.9 +- repo: local + hooks: + - id: pylint + name: pylint + entry: python -m pylint.__main__ + args: ['--rcfile=.pylintrc', --disable=redefined-builtin] + language: system + types: [python] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.770 + hooks: + - id: 'mypy' + additional_dependencies: + - 'pydantic' diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..130a88c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,503 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist=pydantic + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore= + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=all + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member, imports, variables + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[LOGGING] + +# Format style used to check logging format string. `old` means using % +# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=numpy.*,torch.*,spacy.attrs.* + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma, + dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[STRING] + +# This flag controls whether the implicit-str-concat-in-sequence should +# generate a warning on implicit string concatenation in sequences defined over +# several lines. +check-str-concat-over-line-jumps=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException, + Exception diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e0d61b5 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.9.4 diff --git a/README.md b/README.md index d622d16..b804463 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Requirements: * python3 and pip3 -Instructions: +Instructions to run: * Setup a virtual enviroment to manage dependencies (optional) * `python3 -m venv env` @@ -17,3 +17,13 @@ Instructions: * `pip3 install -r requirements.txt` * Run tests for token flow * `python -m unittest tests/tokenflow_test.py` + +If contributing: +* Install dev libraries + * `pip3 install -r requirements_dev.txt` +* Setup pre-commit + * `pre-commit install` +* Install dependencies and verify it's working + * `pre-commit run --all-files` + * If you see "failed to find interpreter for..." it means you're missing the correct python version + * The current version is python3.9 - [pyenv](https://github.com/pyenv/pyenv) is a great option for managing python versions diff --git a/mev_inspect/block.py b/mev_inspect/block.py index 03f1b0f..94b06d5 100644 --- a/mev_inspect/block.py +++ b/mev_inspect/block.py @@ -1,4 +1,3 @@ -import json from pathlib import Path from typing import List @@ -7,7 +6,7 @@ from web3 import Web3 from mev_inspect.schemas import Block, BlockCall, BlockCallType -cache_directory = './cache' +cache_directory = "./cache" ## Creates a block object, either from the cache or from the chain itself @@ -16,17 +15,12 @@ cache_directory = './cache' def createFromBlockNumber(block_number: int, base_provider) -> Block: cache_path = _get_cache_path(block_number) - if (cache_path.is_file()): - print( - f'Cache for block {block_number} exists, ' \ - 'loading data from cache' - ) + if cache_path.is_file(): + print(f"Cache for block {block_number} exists, " "loading data from cache") return Block.parse_file(cache_path) else: - print( - f"Cache for block {block_number} did not exist, getting data" - ) + print(f"Cache for block {block_number} did not exist, getting data") w3 = Web3(base_provider) block = fetch_block(w3, base_provider, block_number) @@ -39,36 +33,37 @@ def createFromBlockNumber(block_number: int, base_provider) -> Block: def fetch_block(w3, base_provider, block_number: int) -> Block: ## Get block data block_data = w3.eth.get_block(block_number, True) - + ## Get the block receipts ## TODO: evaluate whether or not this is sufficient or if gas used needs to be converted to a proper big number. ## In inspect-ts it needed to be converted - block_receipts_raw = base_provider.make_request("eth_getBlockReceipts", [block_number]) + block_receipts_raw = base_provider.make_request( + "eth_getBlockReceipts", [block_number] + ) ## Trace the whole block, return those calls block_calls_json = w3.parity.trace_block(block_number) - block_calls = [ - BlockCall(**call_json) - for call_json in block_calls_json - ] - + block_calls = [BlockCall(**call_json) for call_json in block_calls_json] + ## Get the logs block_hash = (block_data.hash).hex() - block_logs = w3.eth.get_logs({'blockHash': block_hash}) + block_logs = w3.eth.get_logs({"blockHash": block_hash}) ## Get gas used by individual txs and store them too txs_gas_data = {} - for transaction in block_data['transactions']: + for transaction in block_data["transactions"]: tx_hash = (transaction.hash).hex() tx_data = w3.eth.get_transaction(tx_hash) tx_receipt = w3.eth.wait_for_transaction_receipt(tx_hash) txs_gas_data[tx_hash] = { - 'gasUsed': tx_receipt['gasUsed'], # fix: why does this return 0 for certain txs? - 'gasPrice': tx_data['gasPrice'], - 'netFeePaid': tx_data['gasPrice'] * tx_receipt['gasUsed'] + "gasUsed": tx_receipt[ + "gasUsed" + ], # fix: why does this return 0 for certain txs? + "gasPrice": tx_data["gasPrice"], + "netFeePaid": tx_data["gasPrice"] * tx_receipt["gasUsed"], } - + transaction_hashes = get_transaction_hashes(block_calls) ## Create a new object @@ -88,7 +83,10 @@ def get_transaction_hashes(calls: List[BlockCall]) -> List[str]: for call in calls: if call.type != BlockCallType.reward: - if call.transaction_hash not in result: + if ( + call.transaction_hash is not None + and call.transaction_hash not in result + ): result.append(call.transaction_hash) return result diff --git a/mev_inspect/inspector_uniswap.py b/mev_inspect/inspector_uniswap.py index 02d8653..1c04aab 100644 --- a/mev_inspect/inspector_uniswap.py +++ b/mev_inspect/inspector_uniswap.py @@ -7,62 +7,72 @@ from mev_inspect.config import load_config config = load_config() -uniswap_router_abi = json.loads(config['ABI']['UniswapV2Router']) -uniswap_router_address = (config['ADDRESSES']['UniswapV2Router']) -sushiswap_router_address = (config['ADDRESSES']['SushiswapV2Router']) +uniswap_router_abi = json.loads(config["ABI"]["UniswapV2Router"]) +uniswap_router_address = config["ADDRESSES"]["UniswapV2Router"] +sushiswap_router_address = config["ADDRESSES"]["SushiswapV2Router"] + +uniswap_pair_abi = json.loads(config["ABI"]["UniswapV2Pair"]) -uniswap_pair_abi = json.loads(config['ABI']['UniswapV2Pair']) class UniswapInspector: def __init__(self, base_provider) -> None: self.w3 = Web3(base_provider) - + self.trading_functions = self.get_trading_functions() - self.uniswap_v2_router_contract = self.w3.eth.contract(abi=uniswap_router_abi, address=uniswap_router_address) + self.uniswap_v2_router_contract = self.w3.eth.contract( + abi=uniswap_router_abi, address=uniswap_router_address + ) self.uniswap_router_trade_signatures = self.get_router_signatures() self.uniswap_v2_pair_contract = self.w3.eth.contract(abi=uniswap_pair_abi) - self.uniswap_v2_pair_swap_signatures = self.uniswap_v2_pair_contract.functions.swap(0, 0, uniswap_router_address, "").selector ## Note the address here doesn't matter, but it must be filled out - self.uniswap_v2_pair_reserves_signatures = self.uniswap_v2_pair_contract.functions.getReserves().selector ## Called "checksigs" in mev-inpsect.ts + self.uniswap_v2_pair_swap_signatures = ( + self.uniswap_v2_pair_contract.functions.swap( + 0, 0, uniswap_router_address, "" + ).selector + ) ## Note the address here doesn't matter, but it must be filled out + self.uniswap_v2_pair_reserves_signatures = ( + self.uniswap_v2_pair_contract.functions.getReserves().selector + ) ## Called "checksigs" in mev-inpsect.ts print("Built Uniswap inspector") + def get_trading_functions(self): ## Gets all functions used for swapping result = [] - + ## For each entry in the ABI for abi in uniswap_router_abi: ## Check to see if the entry is a function and if it is if the function's name starts with swap - if abi['type'] == 'function' and abi['name'].startswith('swap'): + if abi["type"] == "function" and abi["name"].startswith("swap"): ## If so add it to our array - result.append(abi['name']) - + result.append(abi["name"]) + return result def get_router_signatures(self): ## Gets the selector / function signatures of all the router swap functions result = [] - + ## For each entry in the ABI for abi in uniswap_router_abi: ## Check to see if the entry is a function and if it is if the function's name starts with swap - if abi['type'] == 'function' and abi['name'].startswith('swap'): + if abi["type"] == "function" and abi["name"].startswith("swap"): ## Add a parantheses - function = abi['name'] + '(' - + function = abi["name"] + "(" + ## For each input in the function's input - for input in abi['inputs']: - + for input in abi["inputs"]: + ## Concat them into a string with commas - function = function + input['internalType'] + ',' + function = function + input["internalType"] + "," ## Take off the last comma, add a ')' to close the parentheses - function = function[:-1] + ')' + function = function[:-1] + ")" ## The result looks like this: 'swapETHForExactTokens(uint256,address[],address,uint256)' ## Take the first 4 bytes of the sha3 hash of the above string. - selector = (Web3.sha3(text=function)[0:4]) + selector = Web3.sha3(text=function)[0:4] ## Add that to an array result.append(selector) @@ -70,12 +80,13 @@ class UniswapInspector: return result def inspect(self, calls): - result = [] - - trade_calls = [] - for call in calls: - print('\n',call) - if (call['action']['to'] == uniswap_router_address.lower() or call['action']['to'] == sushiswap_router_address.lower()) and utils.check_call_for_signature(call, self.uniswap_router_trade_signatures): + print("\n", call) + if ( + call["action"]["to"] == uniswap_router_address.lower() + or call["action"]["to"] == sushiswap_router_address.lower() + ) and utils.check_call_for_signature( + call, self.uniswap_router_trade_signatures + ): # print("WIP, here is where there is a call that matches what we are looking for") 1 == 1 diff --git a/mev_inspect/processor.py b/mev_inspect/processor.py index 5dd96ea..c1a0efb 100644 --- a/mev_inspect/processor.py +++ b/mev_inspect/processor.py @@ -9,10 +9,7 @@ class Processor: def get_transaction_evaluations(self, block_data): for transaction_hash in block_data.transaction_hashes: calls = block_data.get_filtered_calls(transaction_hash) - calls_json = [ - to_original_json_dict(call) - for call in calls - ] + calls_json = [to_original_json_dict(call) for call in calls] for inspector in self.inspectors: inspector.inspect(calls_json) diff --git a/mev_inspect/schemas/blocks.py b/mev_inspect/schemas/blocks.py index 18e41b1..b160fcf 100644 --- a/mev_inspect/schemas/blocks.py +++ b/mev_inspect/schemas/blocks.py @@ -1,9 +1,6 @@ -import json from enum import Enum from typing import Dict, List, Optional -from pydantic import BaseModel - from .utils import CamelModel, Web3Model @@ -38,7 +35,4 @@ class Block(Web3Model): txs_gas_data: Dict[str, dict] def get_filtered_calls(self, hash: str) -> List[BlockCall]: - return [ - call for call in self.calls - if call.transaction_hash == hash - ] + return [call for call in self.calls if call.transaction_hash == hash] diff --git a/mev_inspect/schemas/utils.py b/mev_inspect/schemas/utils.py index a3cb04b..70eaf49 100644 --- a/mev_inspect/schemas/utils.py +++ b/mev_inspect/schemas/utils.py @@ -6,9 +6,8 @@ from web3.datastructures import AttributeDict def to_camel(string: str) -> str: - return ''.join( - word.capitalize() if i > 0 else word - for i, word in enumerate(string.split('_')) + return "".join( + word.capitalize() if i > 0 else word for i, word in enumerate(string.split("_")) ) diff --git a/mev_inspect/tokenflow.py b/mev_inspect/tokenflow.py index 3941298..6e08b2f 100644 --- a/mev_inspect/tokenflow.py +++ b/mev_inspect/tokenflow.py @@ -5,186 +5,222 @@ from mev_inspect.config import load_config config = load_config() -rpc_url = config['RPC']['Endpoint'] -weth_address = config['ADDRESSES']['WETH'] +rpc_url = config["RPC"]["Endpoint"] +weth_address = config["ADDRESSES"]["WETH"] # w3 = Web3(HTTPProvider(rpc_url)) -cache_directory = './cache' +cache_directory = "./cache" + def get_tx_traces(txHash, blockNo): # block_calls = w3.parity.trace_block(10803840) - cache_file = '{cacheDirectory}/{blockNumber}-new.json'.format(cacheDirectory=cache_directory, blockNumber=blockNo) + cache_file = "{cacheDirectory}/{blockNumber}-new.json".format( + cacheDirectory=cache_directory, blockNumber=blockNo + ) file_exists = Path(cache_file).is_file() - + tx_traces = [] # if have the traces cached - if(file_exists): + if file_exists: block_file = open(cache_file) block_json = json.load(block_file) - for call in block_json['calls']: - if call['transactionHash'] == txHash: + for call in block_json["calls"]: + if call["transactionHash"] == txHash: tx_traces.append(call) block_file.close() else: # todo, fetch and cache traces that don't exist - # depending on the best way to call block.py from here + # depending on the best way to call block.py from here print("traces do not exist") - - return(tx_traces) + + return tx_traces + def is_stablecoin_address(address): # to look for stablecoin inflow/outflows stablecoin_addresses = [ - "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48", # USDC - "0xdac17f958d2ee523a2206206994597c13d831ec7", # USDT - "0x6b175474e89094c44da98b954eedeac495271d0f", # DAI - "0x0000000000085d4780b73119b644ae5ecd22b376", # TUSD - "0x4fabb145d64652a948d72533023f6e7a623c7c53", # BUSD - "0x8e870d67f660d95d5be530380d0ec0bd388289e1", # PAX - "0x956F47F50A910163D8BF957Cf5846D573E7f87CA", # FEI - "0x853d955aCEf822Db058eb8505911ED77F175b99e", # FRAX - "0xBC6DA0FE9aD5f3b0d58160288917AA56653660E9", # alUSD - "0x57Ab1ec28D129707052df4dF418D58a2D46d5f51", # sUSD - "0x5f98805A4E8be255a32880FDeC7F6728C6568bA0", # lUSD - "0x674C6Ad92Fd080e4004b2312b45f796a192D27a0", # USDN + "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48", # USDC + "0xdac17f958d2ee523a2206206994597c13d831ec7", # USDT + "0x6b175474e89094c44da98b954eedeac495271d0f", # DAI + "0x0000000000085d4780b73119b644ae5ecd22b376", # TUSD + "0x4fabb145d64652a948d72533023f6e7a623c7c53", # BUSD + "0x8e870d67f660d95d5be530380d0ec0bd388289e1", # PAX + "0x956F47F50A910163D8BF957Cf5846D573E7f87CA", # FEI + "0x853d955aCEf822Db058eb8505911ED77F175b99e", # FRAX + "0xBC6DA0FE9aD5f3b0d58160288917AA56653660E9", # alUSD + "0x57Ab1ec28D129707052df4dF418D58a2D46d5f51", # sUSD + "0x5f98805A4E8be255a32880FDeC7F6728C6568bA0", # lUSD + "0x674C6Ad92Fd080e4004b2312b45f796a192D27a0", # USDN ] return address in stablecoin_addresses + def is_known_router_address(address): # to exclude known router addresses from token flow analysis - known_router_addresses = [ - "0x3D71d79C224998E608d03C5Ec9B405E7a38505F0", # keeper dao, whitelists extraction - "0x11111254369792b2Ca5d084aB5eEA397cA8fa48B", # 1inch v1 router - "0x111111125434b319222cdbf8c261674adb56f3ae", # 1inch v2 router - "0x11111112542d85b3ef69ae05771c2dccff4faa26", # 1inch v3 router - "0xa356867fdcea8e71aeaf87805808803806231fdc", # DODO - "0xdef1c0ded9bec7f1a1670819833240f027b25eff", # 0x proxy - "0x90f765f63e7dc5ae97d6c576bf693fb6af41c129", # Set Trade - "0x7113dd99c79aff93d54cfa4b2885576535a132de", # Totle exchange - "0x9509665d015bfe3c77aa5ad6ca20c8afa1d98989", # Paraswap - "0x86969d29F5fd327E1009bA66072BE22DB6017cC6", # Paraswap v2 - "0xf90e98f3d8dce44632e5020abf2e122e0f99dfab", # Paraswap v3 - "0x57805e5a227937bac2b0fdacaa30413ddac6b8e1", # Furucombo - "0x17e8ca1b4798b97602895f63206afcd1fc90ca5f", # Furucombo proxy - "0x881d40237659c251811cec9c364ef91dc08d300c", # Metamask swap - "0x745daa146934b27e3f0b6bff1a6e36b9b90fb131", # DEX.ag - "0xb2be281e8b11b47fec825973fc8bb95332022a54", # Zerion SDK - "0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D", # UniswapV2Router02 - "0xd9e1cE17f2641f24aE83637ab66a2cca9C378B9F", # SushiswapV2Router02 - "0xE592427A0AEce92De3Edee1F18E0157C05861564", # Uniswap v3 router - "0x3E66B66Fd1d0b02fDa6C811Da9E0547970DB2f21", # Balance exchange proxy - "0x1bD435F3C054b6e901B7b108a0ab7617C808677b", # Paraswap v4 - "0xC011a73ee8576Fb46F5E1c5751cA3B9Fe0af2a6F" # SNX proxy synth issuer + known_router_addresses = [ + "0x3D71d79C224998E608d03C5Ec9B405E7a38505F0", # keeper dao, whitelists extraction + "0x11111254369792b2Ca5d084aB5eEA397cA8fa48B", # 1inch v1 router + "0x111111125434b319222cdbf8c261674adb56f3ae", # 1inch v2 router + "0x11111112542d85b3ef69ae05771c2dccff4faa26", # 1inch v3 router + "0xa356867fdcea8e71aeaf87805808803806231fdc", # DODO + "0xdef1c0ded9bec7f1a1670819833240f027b25eff", # 0x proxy + "0x90f765f63e7dc5ae97d6c576bf693fb6af41c129", # Set Trade + "0x7113dd99c79aff93d54cfa4b2885576535a132de", # Totle exchange + "0x9509665d015bfe3c77aa5ad6ca20c8afa1d98989", # Paraswap + "0x86969d29F5fd327E1009bA66072BE22DB6017cC6", # Paraswap v2 + "0xf90e98f3d8dce44632e5020abf2e122e0f99dfab", # Paraswap v3 + "0x57805e5a227937bac2b0fdacaa30413ddac6b8e1", # Furucombo + "0x17e8ca1b4798b97602895f63206afcd1fc90ca5f", # Furucombo proxy + "0x881d40237659c251811cec9c364ef91dc08d300c", # Metamask swap + "0x745daa146934b27e3f0b6bff1a6e36b9b90fb131", # DEX.ag + "0xb2be281e8b11b47fec825973fc8bb95332022a54", # Zerion SDK + "0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D", # UniswapV2Router02 + "0xd9e1cE17f2641f24aE83637ab66a2cca9C378B9F", # SushiswapV2Router02 + "0xE592427A0AEce92De3Edee1F18E0157C05861564", # Uniswap v3 router + "0x3E66B66Fd1d0b02fDa6C811Da9E0547970DB2f21", # Balance exchange proxy + "0x1bD435F3C054b6e901B7b108a0ab7617C808677b", # Paraswap v4 + "0xC011a73ee8576Fb46F5E1c5751cA3B9Fe0af2a6F", # SNX proxy synth issuer ] return address in known_router_addresses + # we're interested in the to address to run token flow on it as well def get_tx_to_address(txHash, blockNo): - cache_file = '{cacheDirectory}/{blockNumber}-new.json'.format(cacheDirectory=cache_directory, blockNumber=blockNo) + cache_file = "{cacheDirectory}/{blockNumber}-new.json".format( + cacheDirectory=cache_directory, blockNumber=blockNo + ) block_file = open(cache_file) block_json = json.load(block_file) - for receipt in block_json['receipts']['result']: - if receipt['transactionHash'] == txHash: + for receipt in block_json["receipts"]["result"]: + if receipt["transactionHash"] == txHash: block_file.close() - return receipt['to'] - + return receipt["to"] + def get_tx_proxies(tx_traces, to_address): proxies = [] for trace in tx_traces: - if(trace['type'] == 'call' and trace['action']['callType'] == 'delegatecall' and trace['action']['from'] == to_address): - proxies.append(trace['action']['to']) - return(proxies) + if ( + trace["type"] == "call" + and trace["action"]["callType"] == "delegatecall" + and trace["action"]["from"] == to_address + ): + proxies.append(trace["action"]["to"]) + return proxies + def get_net_gas_used(txHash, blockNo): - cache_file = '{cacheDirectory}/{blockNumber}.json'.format(cacheDirectory=cache_directory, blockNumber=blockNo) + cache_file = "{cacheDirectory}/{blockNumber}.json".format( + cacheDirectory=cache_directory, blockNumber=blockNo + ) block_file = open(cache_file) block_json = json.load(block_file) gas_used = 0 - for trace in block_json['calls']: - if trace['transactionHash'] == txHash: - gas_used = gas_used + int(trace['result']['gasUsed'],16) + for trace in block_json["calls"]: + if trace["transactionHash"] == txHash: + gas_used = gas_used + int(trace["result"]["gasUsed"], 16) print(gas_used) + def get_ether_flows(tx_traces, addresses_to_check): eth_inflow = 0 eth_outflow = 0 for trace in tx_traces: - if(trace['type'] == 'call'): - value = int(trace['action']['value'], 16) # converting from 0x prefix to decimal + if trace["type"] == "call": + value = int( + trace["action"]["value"], 16 + ) # converting from 0x prefix to decimal # ETH_GET - if(trace['action']['callType'] != 'delegatecall' and trace['action']['from'] != weth_address and value > 0 and trace['action']['to'] in addresses_to_check): + if ( + trace["action"]["callType"] != "delegatecall" + and trace["action"]["from"] != weth_address + and value > 0 + and trace["action"]["to"] in addresses_to_check + ): eth_inflow = eth_inflow + value # ETH_GIVE - if(trace['action']['callType'] != 'delegatecall' and trace['action']['to'] != weth_address and value > 0 and trace['action']['from'] in addresses_to_check): + if ( + trace["action"]["callType"] != "delegatecall" + and trace["action"]["to"] != weth_address + and value > 0 + and trace["action"]["from"] in addresses_to_check + ): eth_outflow = eth_outflow + value - if(trace['action']['to'] == weth_address): + if trace["action"]["to"] == weth_address: # WETH_GET1 & WETH_GET2 (to account for both 'transfer' and 'transferFrom' methods) # WETH_GIVE1 & WETH_GIVE2 # transfer(address to,uint256 value) with args - if(len(trace['action']['input']) == 138): - if(trace['action']['input'][2:10] == "a9059cbb"): - transfer_to = '0x' + trace['action']['input'][34:74] - transfer_value = int('0x' + trace['action']['input'][74:138], 16) - if(transfer_to in addresses_to_check): + if len(trace["action"]["input"]) == 138: + if trace["action"]["input"][2:10] == "a9059cbb": + transfer_to = "0x" + trace["action"]["input"][34:74] + transfer_value = int( + "0x" + trace["action"]["input"][74:138], 16 + ) + if transfer_to in addresses_to_check: eth_inflow = eth_inflow + transfer_value - elif(trace['action']['from'] in addresses_to_check): + elif trace["action"]["from"] in addresses_to_check: eth_outflow = eth_outflow + transfer_value # transferFrom(address from,address to,uint256 value ) - if(len(trace['action']['input']) == 202): - if(trace['action']['input'][2:10] == "23b872dd"): - transfer_from = '0x' + trace['action']['input'][34:74] - transfer_to = '0x' + trace['action']['input'][98:138] - transfer_value = int('0x' + trace['action']['input'][138:202], 16) - if(transfer_to in addresses_to_check): + if len(trace["action"]["input"]) == 202: + if trace["action"]["input"][2:10] == "23b872dd": + transfer_from = "0x" + trace["action"]["input"][34:74] + transfer_to = "0x" + trace["action"]["input"][98:138] + transfer_value = int( + "0x" + trace["action"]["input"][138:202], 16 + ) + if transfer_to in addresses_to_check: eth_inflow = eth_inflow + transfer_value - elif(transfer_from in addresses_to_check): + elif transfer_from in addresses_to_check: eth_outflow = eth_outflow + transfer_value - - if(trace['type'] == 'suicide'): - if(trace['action']['refundAddress'] in addresses_to_check): - refund_value = int('0x' + trace['action']['balance'], 16) + + if trace["type"] == "suicide": + if trace["action"]["refundAddress"] in addresses_to_check: + refund_value = int("0x" + trace["action"]["balance"], 16) eth_inflow = eth_inflow + refund_value return [eth_inflow, eth_outflow] + def get_dollar_flows(tx_traces, addresses_to_check): dollar_inflow = 0 dollar_outflow = 0 for trace in tx_traces: - if(trace['type'] == 'call' and is_stablecoin_address(trace['action']['to'])): - value = int(trace['action']['value'], 16) # converting from 0x prefix to decimal + if trace["type"] == "call" and is_stablecoin_address(trace["action"]["to"]): + _ = int( + trace["action"]["value"], 16 + ) # converting from 0x prefix to decimal # USD_GET1 & USD_GET2 (to account for both 'transfer' and 'transferFrom' methods) # USD_GIVE1 & USD_GIVE2 # transfer(address to,uint256 value) with args - if(len(trace['action']['input']) == 138): - if(trace['action']['input'][2:10] == "a9059cbb"): - transfer_to = '0x' + trace['action']['input'][34:74] - transfer_value = int('0x' + trace['action']['input'][74:138], 16) - if(transfer_to in addresses_to_check): + if len(trace["action"]["input"]) == 138: + if trace["action"]["input"][2:10] == "a9059cbb": + transfer_to = "0x" + trace["action"]["input"][34:74] + transfer_value = int("0x" + trace["action"]["input"][74:138], 16) + if transfer_to in addresses_to_check: dollar_inflow = dollar_inflow + transfer_value - elif(trace['action']['from'] in addresses_to_check): + elif trace["action"]["from"] in addresses_to_check: dollar_outflow = dollar_outflow + transfer_value # transferFrom(address from,address to,uint256 value ) - if(len(trace['action']['input']) == 202): - if(trace['action']['input'][2:10] == "23b872dd"): - transfer_from = '0x' + trace['action']['input'][34:74] - transfer_to = '0x' + trace['action']['input'][98:138] - transfer_value = int('0x' + trace['action']['input'][138:202], 16) - if(transfer_to in addresses_to_check): + if len(trace["action"]["input"]) == 202: + if trace["action"]["input"][2:10] == "23b872dd": + transfer_from = "0x" + trace["action"]["input"][34:74] + transfer_to = "0x" + trace["action"]["input"][98:138] + transfer_value = int("0x" + trace["action"]["input"][138:202], 16) + if transfer_to in addresses_to_check: dollar_inflow = dollar_inflow + transfer_value - elif(transfer_from in addresses_to_check): + elif transfer_from in addresses_to_check: dollar_outflow = dollar_outflow + transfer_value return [dollar_inflow, dollar_outflow] + def run_tokenflow(txHash, blockNo): tx_traces = get_tx_traces(txHash, blockNo) to_address = get_tx_to_address(txHash, blockNo) @@ -194,27 +230,26 @@ def run_tokenflow(txHash, blockNo): proxies = get_tx_proxies(tx_traces, to_address) for proxy in proxies: addresses_to_check.append(proxy.lower()) - + # check if the 'to' field is a known aggregator/router # if not, add to relevant addresses to run TF on - if(not is_known_router_address(to_address)): - addresses_to_check.append(to_address.lower()) # traces need lowercase addresses to match - + if not is_known_router_address(to_address): + addresses_to_check.append( + to_address.lower() + ) # traces need lowercase addresses to match + ether_flows = get_ether_flows(tx_traces, addresses_to_check) dollar_flows = get_dollar_flows(tx_traces, addresses_to_check) # print(addresses_to_check) # print('net eth flow', ether_flows[0] - ether_flows[1]) # print('net dollar flow', dollar_flows ) - return { - 'ether_flows': ether_flows, - 'dollar_flows': dollar_flows - } + return {"ether_flows": ether_flows, "dollar_flows": dollar_flows} # note: not the gas set by user, only gas consumed upon execution -def get_gas_used_by_tx(txHash): - #tx_receipt = w3.eth.getTransactionReceipt(txHash) - return(tx_receipt['gasUsed']) +# def get_gas_used_by_tx(txHash): +# # tx_receipt = w3.eth.getTransactionReceipt(txHash) +# return tx_receipt["gasUsed"] # tx_traces = get_tx_traces('0x4121ce805d33e952b2e6103a5024f70c118432fd0370128d6d7845f9b2987922', 11930296) diff --git a/mev_inspect/utils.py b/mev_inspect/utils.py index 2559918..f8785cc 100644 --- a/mev_inspect/utils.py +++ b/mev_inspect/utils.py @@ -1,9 +1,10 @@ from hexbytes.main import HexBytes + def check_call_for_signature(call, signatures): - if (call['action']['input'] == None): + if call["action"]["input"] == None: return False - + ## By default set this to False signature_present_boolean = False @@ -11,11 +12,11 @@ def check_call_for_signature(call, signatures): for signature in signatures: # print("Desired signature:", str(signature)) # print("Actual", HexBytes(call['action']['input'])) - - if HexBytes(call['action']['input']).startswith((signature)): + + if HexBytes(call["action"]["input"]).startswith((signature)): ## Note that we are turning the input into hex bytes here, which seems to be fine ## Working with strings was doing weird things print("hit") signature_present_boolean = True - return signature_present_boolean \ No newline at end of file + return signature_present_boolean diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..e3f61f9 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,2 @@ +pre-commit==2.13.0 +pylint==2.9.3 diff --git a/testing_file.py b/testing_file.py index 869b5f3..98a7675 100644 --- a/testing_file.py +++ b/testing_file.py @@ -1,16 +1,22 @@ import argparse -from web3.providers import base from web3 import Web3 from mev_inspect import block from mev_inspect.inspector_uniswap import UniswapInspector from mev_inspect.processor import Processor -parser = argparse.ArgumentParser(description='Inspect some blocks.') -parser.add_argument('-block_number', metavar='b', type=int, nargs='+', - help='the block number you are targetting, eventually this will need to be changed') -parser.add_argument('-rpc', metavar='r', help='rpc endpoint, this needs to have parity style traces') +parser = argparse.ArgumentParser(description="Inspect some blocks.") +parser.add_argument( + "-block_number", + metavar="b", + type=int, + nargs="+", + help="the block number you are targetting, eventually this will need to be changed", +) +parser.add_argument( + "-rpc", metavar="r", help="rpc endpoint, this needs to have parity style traces" +) args = parser.parse_args() ## Set up the base provider, but don't wrap it in web3 so we can make requests to it with make_request() diff --git a/tests/liquidation_test.py b/tests/liquidation_test.py index 0771c2b..9262e64 100644 --- a/tests/liquidation_test.py +++ b/tests/liquidation_test.py @@ -1,20 +1,23 @@ import unittest -from mev_inspect import inspector_compound -from mev_inspect import inspector_aave - -class TestLiquidations (unittest.TestCase): - def test_compound_liquidation(self): - tx_hash = "0x0ec6d5044a47feb3ceb647bf7ea4ffc87d09244d629eeced82ba17ec66605012" - block_no = 11338848 - res = inspector_compound.get_profit(tx_hash, block_no) - # self.assertEqual(res['profit'], 0) - def test_aave_liquidation(self): - tx_hash = "0xc8d2501d28800b1557eb64c5d0e08fd6070c15b6c04c39ca05631f641d19ffb2" - block_no = 10803840 - res = inspector_aave.get_profit(tx_hash, block_no) - # self.assertEqual(res['profit'], 0) +# Fails precommit because these inspectors don't exist yet +# from mev_inspect import inspector_compound +# from mev_inspect import inspector_aave +# +# +# class TestLiquidations(unittest.TestCase): +# def test_compound_liquidation(self): +# tx_hash = "0x0ec6d5044a47feb3ceb647bf7ea4ffc87d09244d629eeced82ba17ec66605012" +# block_no = 11338848 +# res = inspector_compound.get_profit(tx_hash, block_no) +# # self.assertEqual(res['profit'], 0) +# +# def test_aave_liquidation(self): +# tx_hash = "0xc8d2501d28800b1557eb64c5d0e08fd6070c15b6c04c39ca05631f641d19ffb2" +# block_no = 10803840 +# res = inspector_aave.get_profit(tx_hash, block_no) +# # self.assertEqual(res['profit'], 0) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/tokenflow_test.py b/tests/tokenflow_test.py index 7a13ba1..97a257e 100644 --- a/tests/tokenflow_test.py +++ b/tests/tokenflow_test.py @@ -3,27 +3,28 @@ import unittest from mev_inspect import tokenflow -class TestTokenFlow (unittest.TestCase): +class TestTokenFlow(unittest.TestCase): def test_simple_arb(self): tx_hash = "0x4121ce805d33e952b2e6103a5024f70c118432fd0370128d6d7845f9b2987922" block_no = 11930296 res = tokenflow.run_tokenflow(tx_hash, block_no) - self.assertEqual(res['ether_flows'], [3547869861992962562, 3499859860420296704]) - self.assertEqual(res['dollar_flows'], [0,0]) - + self.assertEqual(res["ether_flows"], [3547869861992962562, 3499859860420296704]) + self.assertEqual(res["dollar_flows"], [0, 0]) + def test_arb_with_stable_flow(self): tx_hash = "0x496836e0bd1520388e36c79d587a31d4b3306e4f25352164178ca0667c7f9c29" block_no = 11935012 res = tokenflow.run_tokenflow(tx_hash, block_no) - self.assertEqual(res['ether_flows'], [597044987302243493, 562445964778930176]) - self.assertEqual(res['dollar_flows'], [871839781,871839781]) - + self.assertEqual(res["ether_flows"], [597044987302243493, 562445964778930176]) + self.assertEqual(res["dollar_flows"], [871839781, 871839781]) + def test_complex_cross_arb(self): tx_hash = "0x5ab21bfba50ad3993528c2828c63e311aafe93b40ee934790e545e150cb6ca73" block_no = 11931272 res = tokenflow.run_tokenflow(tx_hash, block_no) - self.assertEqual(res['ether_flows'], [3636400213125714803, 3559576672903063566]) - self.assertEqual(res['dollar_flows'], [0,0]) + self.assertEqual(res["ether_flows"], [3636400213125714803, 3559576672903063566]) + self.assertEqual(res["dollar_flows"], [0, 0]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main()