diff --git a/mev_inspect/arbitrages.py b/mev_inspect/arbitrages.py index 08c13f4..40529d7 100644 --- a/mev_inspect/arbitrages.py +++ b/mev_inspect/arbitrages.py @@ -1,5 +1,5 @@ from itertools import groupby -from typing import List, Optional +from typing import List, Tuple from mev_inspect.schemas.arbitrages import Arbitrage from mev_inspect.schemas.swaps import Swap @@ -23,70 +23,111 @@ def get_arbitrages(swaps: List[Swap]) -> List[Arbitrage]: def _get_arbitrages_from_swaps(swaps: List[Swap]) -> List[Arbitrage]: - pool_addresses = {swap.pool_address for swap in swaps} + """ + An arbitrage is defined as multiple swaps in a series that result in the initial token being returned + to the initial sender address. + + There are 2 types of swaps that are most common (99%+). + Case I (fully routed): + BOT -> A/B -> B/C -> C/A -> BOT + + Case II (always return to bot): + BOT -> A/B -> BOT -> B/C -> BOT -> A/C -> BOT + + There is only 1 correct way to route Case I, but for Case II the following valid routes could be found: + A->B->C->A / B->C->A->B / C->A->B->C. Thus when multiple valid routes are found we filter to the set that + happen in valid order. + """ all_arbitrages = [] - for index, first_swap in enumerate(swaps): - other_swaps = swaps[:index] + swaps[index + 1 :] + start_ends = _get_all_start_end_swaps(swaps) + if len(start_ends) == 0: + return [] - if first_swap.from_address not in pool_addresses: - arbitrage = _get_arbitrage_starting_with_swap(first_swap, other_swaps) + # for (start, end) in filtered_start_ends: + for (start, end) in start_ends: + potential_intermediate_swaps = [ + swap for swap in swaps if swap is not start and swap is not end + ] + routes = _get_all_routes(start, end, potential_intermediate_swaps) - if arbitrage is not None: - all_arbitrages.append(arbitrage) - - return all_arbitrages - - -def _get_arbitrage_starting_with_swap( - start_swap: Swap, - other_swaps: List[Swap], -) -> Optional[Arbitrage]: - swap_path = [start_swap] - current_swap: Swap = start_swap - - while True: - next_swap = _get_swap_from_address( - current_swap.to_address, - current_swap.token_out_address, - other_swaps, - ) - - if next_swap is None: - return None - - swap_path.append(next_swap) - current_swap = next_swap - - if ( - current_swap.to_address == start_swap.from_address - and current_swap.token_out_address == start_swap.token_in_address - ): - - start_amount = start_swap.token_in_amount - end_amount = current_swap.token_out_amount + for route in routes: + start_amount = route[0].token_in_amount + end_amount = route[-1].token_out_amount profit_amount = end_amount - start_amount - return Arbitrage( - swaps=swap_path, - block_number=start_swap.block_number, - transaction_hash=start_swap.transaction_hash, - account_address=start_swap.from_address, - profit_token_address=start_swap.token_in_address, + arb = Arbitrage( + swaps=route, + block_number=route[0].block_number, + transaction_hash=route[0].transaction_hash, + account_address=route[0].from_address, + profit_token_address=route[0].token_in_address, start_amount=start_amount, end_amount=end_amount, profit_amount=profit_amount, ) - - return None + all_arbitrages.append(arb) + if len(all_arbitrages) == 1: + return all_arbitrages + else: + return [ + arb + for arb in all_arbitrages + if (arb.swaps[0].trace_address < arb.swaps[-1].trace_address) + ] -def _get_swap_from_address( - address: str, token_address: str, swaps: List[Swap] -) -> Optional[Swap]: - for swap in swaps: - if swap.pool_address == address and swap.token_in_address == token_address: - return swap +def _get_all_start_end_swaps(swaps: List[Swap]) -> List[Tuple[Swap, Swap]]: + """ + Gets the set of all possible opening and closing swap pairs in an arbitrage via + - swap[start].token_in == swap[end].token_out + - swap[start].from_address == swap[end].to_address + - not swap[start].from_address in all_pool_addresses + - not swap[end].to_address in all_pool_addresses + """ + pool_addrs = [swap.pool_address for swap in swaps] + valid_start_ends: List[Tuple[Swap, Swap]] = [] + for potential_start_swap in swaps: + for potential_end_swap in swaps: + if ( + potential_start_swap.token_in_address + == potential_end_swap.token_out_address + and potential_start_swap.from_address == potential_end_swap.to_address + and not potential_start_swap.from_address in pool_addrs + ): + valid_start_ends.append((potential_start_swap, potential_end_swap)) + return valid_start_ends - return None + +def _get_all_routes( + start_swap: Swap, end_swap: Swap, other_swaps: List[Swap] +) -> List[List[Swap]]: + """ + Returns all routes (List[Swap]) from start to finish between a start_swap and an end_swap only accounting for token_address_in and token_address_out. + """ + # If the path is complete, return + if start_swap.token_out_address == end_swap.token_in_address: + return [[start_swap, end_swap]] + elif len(other_swaps) == 0: + return [] + + # Collect all potential next steps, check if valid, recursively find routes from next_step to end_swap + routes: List[List[Swap]] = [] + for potential_next_swap in other_swaps: + if start_swap.token_out_address == potential_next_swap.token_in_address and ( + start_swap.pool_address == potential_next_swap.from_address + or start_swap.to_address == potential_next_swap.pool_address + or start_swap.to_address == potential_next_swap.from_address + ): + remaining_swaps = [ + swap for swap in other_swaps if swap != potential_next_swap + ] + next_swap_routes = _get_all_routes( + potential_next_swap, end_swap, remaining_swaps + ) + if len(next_swap_routes) > 0: + for next_swap_route in next_swap_routes: + next_swap_route.insert(0, start_swap) + routes.append(next_swap_route) + return routes diff --git a/tests/test_arbitrage_integration.py b/tests/test_arbitrage_integration.py index 8dc04ee..3f3a07f 100644 --- a/tests/test_arbitrage_integration.py +++ b/tests/test_arbitrage_integration.py @@ -15,12 +15,47 @@ def test_arbitrage_real_block(): assert len(swaps) == 51 arbitrages = get_arbitrages(list(swaps)) - assert len(arbitrages) == 1 + assert len(arbitrages) == 2 - arbitrage = arbitrages[0] + arbitrage_1 = [ + arb + for arb in arbitrages + if arb.transaction_hash + == "0x448245bf1a507b73516c4eeee01611927dada6610bf26d403012f2e66800d8f0" + ][0] + arbitrage_2 = [ + arb + for arb in arbitrages + if arb.transaction_hash + == "0xfcf4558f6432689ea57737fe63124a5ec39fd6ba6aaf198df13a825dd599bffc" + ][0] - assert len(arbitrage.swaps) == 3 + assert len(arbitrage_1.swaps) == 3 assert ( - arbitrage.profit_token_address == "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2" + arbitrage_1.profit_token_address == "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2" ) - assert arbitrage.profit_amount == 53560707941943273628 + assert len(arbitrage_1.swaps) == 3 + assert ( + arbitrage_1.swaps[1].token_in_address + == "0x25f8087ead173b73d6e8b84329989a8eea16cf73" + ) + assert ( + arbitrage_1.swaps[1].token_out_address + == "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48" + ) + assert arbitrage_1.profit_amount == 750005273675102326 + + assert len(arbitrage_2.swaps) == 3 + assert ( + arbitrage_2.profit_token_address == "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2" + ) + assert len(arbitrage_2.swaps) == 3 + assert ( + arbitrage_2.swaps[1].token_in_address + == "0x25f8087ead173b73d6e8b84329989a8eea16cf73" + ) + assert ( + arbitrage_2.swaps[1].token_out_address + == "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48" + ) + assert arbitrage_2.profit_amount == 53560707941943273628 diff --git a/tests/test_arbitrages.py b/tests/test_arbitrages.py index 524aa7f..5ac6d52 100644 --- a/tests/test_arbitrages.py +++ b/tests/test_arbitrages.py @@ -1,9 +1,11 @@ -from mev_inspect.arbitrages import get_arbitrages +from typing import List + +from mev_inspect.arbitrages import get_arbitrages, _get_all_routes +from mev_inspect.schemas.swaps import Swap from mev_inspect.classifiers.specs.uniswap import ( UNISWAP_V2_PAIR_ABI_NAME, UNISWAP_V3_POOL_ABI_NAME, ) -from mev_inspect.schemas.swaps import Swap def test_two_pool_arbitrage(get_transaction_hashes, get_addresses): @@ -17,10 +19,11 @@ def test_two_pool_arbitrage(get_transaction_hashes, get_addresses): unrelated_pool_address, first_token_address, second_token_address, - ] = get_addresses(6) + third_token_address, + ] = get_addresses(7) first_token_in_amount = 10 - first_token_out_amount = 10 + first_token_out_amount = 11 second_token_amount = 15 arb_swaps = [ @@ -62,7 +65,7 @@ def test_two_pool_arbitrage(get_transaction_hashes, get_addresses): to_address=account_address, token_in_address=second_token_address, token_in_amount=first_token_in_amount, - token_out_address=first_token_address, + token_out_address=third_token_address, token_out_amount=first_token_out_amount, ) @@ -100,7 +103,7 @@ def test_three_pool_arbitrage(get_transaction_hashes, get_addresses): ] = get_addresses(7) first_token_in_amount = 10 - first_token_out_amount = 10 + first_token_out_amount = 11 second_token_amount = 15 third_token_amount = 40 @@ -158,3 +161,70 @@ def test_three_pool_arbitrage(get_transaction_hashes, get_addresses): assert arbitrage.start_amount == first_token_in_amount assert arbitrage.end_amount == first_token_out_amount assert arbitrage.profit_amount == first_token_out_amount - first_token_in_amount + + +def test_get_all_routes(): + # A -> B, B -> A + start_swap = create_generic_swap("0xa", "0xb") + end_swap = create_generic_swap("0xb", "0xa") + routes = _get_all_routes(start_swap, end_swap, []) + assert len(routes) == 1 + + # A->B, B->C, C->A + start_swap = create_generic_swap("0xa", "0xb") + other_swaps = [create_generic_swap("0xb", "0xc")] + end_swap = create_generic_swap("0xc", "0xa") + routes = _get_all_routes(start_swap, end_swap, other_swaps) + assert len(routes) == 1 + + # A->B, B->C, C->A + A->D + other_swaps.append(create_generic_swap("0xa", "0xd")) + routes = _get_all_routes(start_swap, end_swap, other_swaps) + assert len(routes) == 1 + + # A->B, B->C, C->A + A->D B->E + other_swaps.append(create_generic_swap("0xb", "0xe")) + routes = _get_all_routes(start_swap, end_swap, other_swaps) + assert len(routes) == 1 + + # A->B, B->A, B->C, C->A + other_swaps = [create_generic_swap("0xb", "0xa"), create_generic_swap("0xb", "0xc")] + routes = _get_all_routes(start_swap, end_swap, other_swaps) + assert len(routes) == 1 + expect_simple_route = [["0xa", "0xb"], ["0xb", "0xc"], ["0xc", "0xa"]] + assert len(routes[0]) == len(expect_simple_route) + for i in range(len(expect_simple_route)): + assert expect_simple_route[i][0] == routes[0][i].token_in_address + assert expect_simple_route[i][1] == routes[0][i].token_out_address + + # A->B, B->C, C->D, D->A, B->D + end_swap = create_generic_swap("0xd", "0xa") + other_swaps = [ + create_generic_swap("0xb", "0xc"), + create_generic_swap("0xc", "0xd"), + create_generic_swap("0xb", "0xd"), + ] + routes = _get_all_routes(start_swap, end_swap, other_swaps) + assert len(routes) == 2 + + +def create_generic_swap( + tok_a: str = "0xa", + tok_b: str = "0xb", + amount_a_in: int = 1, + amount_b_out: int = 1, + trace_address: List[int] = [], +): + return Swap( + abi_name=UNISWAP_V3_POOL_ABI_NAME, + transaction_hash="0xfake", + block_number=0, + trace_address=trace_address, + pool_address="0xfake", + from_address="0xfake", + to_address="0xfake", + token_in_address=tok_a, + token_in_amount=amount_a_in, + token_out_address=tok_b, + token_out_amount=amount_b_out, + )