From 93bdb7c129628b5a98710169cd52c963a2cdc2e3 Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 13:15:30 -0500 Subject: [PATCH 01/12] Write blocks as proof of concept --- mev_inspect/crud/blocks.py | 39 +++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/mev_inspect/crud/blocks.py b/mev_inspect/crud/blocks.py index fce9d2e..e8d7152 100644 --- a/mev_inspect/crud/blocks.py +++ b/mev_inspect/crud/blocks.py @@ -1,5 +1,6 @@ +import io from datetime import datetime -from typing import List +from typing import Any, List, Optional from mev_inspect.schemas.blocks import Block @@ -24,20 +25,32 @@ def delete_blocks( db_session.commit() +def clean_csv_value(value: Optional[Any]) -> str: + if value is None: + return r"\N" + return str(value).replace("\n", "\\n") + + def write_blocks( db_session, blocks: List[Block], ) -> None: - block_params = [ - { - "block_number": block.block_number, - "block_timestamp": datetime.fromtimestamp(block.block_timestamp), - } - for block in blocks - ] + csv_file_like_object = io.StringIO() + for block in blocks: + csv_file_like_object.write( + "|".join( + map( + clean_csv_value, + ( + block.block_number, + datetime.fromtimestamp(block.block_timestamp), + ), + ) + ) + + "\n" + ) - db_session.execute( - "INSERT INTO blocks (block_number, block_timestamp) VALUES (:block_number, :block_timestamp)", - params=block_params, - ) - db_session.commit() + csv_file_like_object.seek(0) + + with db_session.connection().connection.cursor() as cursor: + cursor.copy_from(csv_file_like_object, "blocks", sep="|") From bab2043575f89d2d7dc8a79b99390016501920d7 Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 13:38:34 -0500 Subject: [PATCH 02/12] Abstract out csv writing --- mev_inspect/crud/blocks.py | 35 +++++++++-------------------------- mev_inspect/db.py | 25 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/mev_inspect/crud/blocks.py b/mev_inspect/crud/blocks.py index e8d7152..41199a5 100644 --- a/mev_inspect/crud/blocks.py +++ b/mev_inspect/crud/blocks.py @@ -1,7 +1,7 @@ -import io from datetime import datetime -from typing import Any, List, Optional +from typing import List +from mev_inspect.db import write_as_csv from mev_inspect.schemas.blocks import Block @@ -25,32 +25,15 @@ def delete_blocks( db_session.commit() -def clean_csv_value(value: Optional[Any]) -> str: - if value is None: - return r"\N" - return str(value).replace("\n", "\\n") - - def write_blocks( db_session, blocks: List[Block], ) -> None: - csv_file_like_object = io.StringIO() - for block in blocks: - csv_file_like_object.write( - "|".join( - map( - clean_csv_value, - ( - block.block_number, - datetime.fromtimestamp(block.block_timestamp), - ), - ) - ) - + "\n" + items_generator = ( + ( + block.block_number, + datetime.fromtimestamp(block.block_timestamp), ) - - csv_file_like_object.seek(0) - - with db_session.connection().connection.cursor() as cursor: - cursor.copy_from(csv_file_like_object, "blocks", sep="|") + for block in blocks + ) + write_as_csv(db_session, "blocks", items_generator) diff --git a/mev_inspect/db.py b/mev_inspect/db.py index 15ccdc3..38f588e 100644 --- a/mev_inspect/db.py +++ b/mev_inspect/db.py @@ -1,5 +1,6 @@ +import io import os -from typing import Optional +from typing import Any, Iterable, Optional from sqlalchemy import create_engine, orm from sqlalchemy.orm import sessionmaker @@ -63,3 +64,25 @@ def get_trace_session() -> Optional[orm.Session]: return Session() return None + + +def _clean_csv_value(value: Optional[Any]) -> str: + if value is None: + return r"\N" + return str(value).replace("\n", "\\n") + + +def write_as_csv( + db_session, + table_name: str, + items: Iterable[Iterable[Any]], +) -> None: + csv_file_like_object = io.StringIO() + + for item in items: + csv_file_like_object.write("|".join(map(_clean_csv_value, item)) + "\n") + + csv_file_like_object.seek(0) + + with db_session.connection().connection.cursor() as cursor: + cursor.copy_from(csv_file_like_object, table_name, sep="|") From 6b1c469a108297c50ee58bef4e9a33c1cdcce76d Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 14:27:36 -0500 Subject: [PATCH 03/12] Move classified_traces to csv write --- mev_inspect/crud/traces.py | 64 ++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index 0f099f6..6fe9004 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -1,6 +1,8 @@ import json -from typing import List +from datetime import datetime, timezone +from typing import Any, List +from mev_inspect.db import write_as_csv from mev_inspect.models.traces import ClassifiedTraceModel from mev_inspect.schemas.traces import ClassifiedTrace @@ -26,30 +28,40 @@ def write_classified_traces( db_session, classified_traces: List[ClassifiedTrace], ) -> None: - models = [] - for trace in classified_traces: - inputs_json = (json.loads(trace.json(include={"inputs"}))["inputs"],) - models.append( - ClassifiedTraceModel( - transaction_hash=trace.transaction_hash, - transaction_position=trace.transaction_position, - block_number=trace.block_number, - classification=trace.classification.value, - trace_type=trace.type.value, - trace_address=trace.trace_address, - protocol=str(trace.protocol), - abi_name=trace.abi_name, - function_name=trace.function_name, - function_signature=trace.function_signature, - inputs=inputs_json, - from_address=trace.from_address, - to_address=trace.to_address, - gas=trace.gas, - value=trace.value, - gas_used=trace.gas_used, - error=trace.error, - ) + now = datetime.now(timezone.utc) + items = ( + ( + now, # classified_at - gets a default + trace.transaction_hash, + trace.block_number, + trace.classification.value, + trace.type.value, + str(trace.protocol), + trace.abi_name, + trace.function_name, + trace.function_signature, + _inputs_as_json(trace), + trace.from_address, + trace.to_address, + trace.gas, + trace.value, + trace.gas_used, + trace.error, + _to_csv_list(trace.trace_address), + trace.transaction_position, ) + for trace in classified_traces + ) - db_session.bulk_save_objects(models) - db_session.commit() + write_as_csv(db_session, "classified_traces", items) + + +def _inputs_as_json(trace) -> str: + return json.dumps(json.loads(trace.json(include={"inputs"}))["inputs"]) + + +def _to_csv_list(values: List[Any]) -> str: + if len(values) == 0: + return "{}" + + return "{" + ",".join(map(str, values)) + "}" From ada540c1d4fd4a2ece807726c55a219e92ac213d Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 14:50:27 -0500 Subject: [PATCH 04/12] Write using an iterator --- mev_inspect/db.py | 14 ++++++-------- mev_inspect/string_io.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 mev_inspect/string_io.py diff --git a/mev_inspect/db.py b/mev_inspect/db.py index 38f588e..8e4d57a 100644 --- a/mev_inspect/db.py +++ b/mev_inspect/db.py @@ -1,10 +1,11 @@ -import io import os from typing import Any, Iterable, Optional from sqlalchemy import create_engine, orm from sqlalchemy.orm import sessionmaker +from mev_inspect.string_io import StringIteratorIO + def get_trace_database_uri() -> Optional[str]: username = os.getenv("TRACE_DB_USER") @@ -77,12 +78,9 @@ def write_as_csv( table_name: str, items: Iterable[Iterable[Any]], ) -> None: - csv_file_like_object = io.StringIO() - - for item in items: - csv_file_like_object.write("|".join(map(_clean_csv_value, item)) + "\n") - - csv_file_like_object.seek(0) + csv_iterator = StringIteratorIO( + ("|".join(map(_clean_csv_value, item)) + "\n" for item in items) + ) with db_session.connection().connection.cursor() as cursor: - cursor.copy_from(csv_file_like_object, table_name, sep="|") + cursor.copy_from(csv_iterator, table_name, sep="|") diff --git a/mev_inspect/string_io.py b/mev_inspect/string_io.py new file mode 100644 index 0000000..1c0a40d --- /dev/null +++ b/mev_inspect/string_io.py @@ -0,0 +1,38 @@ +import io +from typing import Iterator, Optional + + +class StringIteratorIO(io.TextIOBase): + def __init__(self, iter: Iterator[str]): + self._iter = iter + self._buff = "" + + def readable(self) -> bool: + return True + + def _read1(self, n: Optional[int] = None) -> str: + while not self._buff: + try: + self._buff = next(self._iter) + except StopIteration: + break + ret = self._buff[:n] + self._buff = self._buff[len(ret) :] + return ret + + def read(self, n: Optional[int] = None) -> str: + line = [] + if n is None or n < 0: + while True: + m = self._read1() + if not m: + break + line.append(m) + else: + while n > 0: + m = self._read1(n) + if not m: + break + n -= len(m) + line.append(m) + return "".join(line) From 9b8cac5c5d54c4cc0341a5b711659c401f672a33 Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 15:14:28 -0500 Subject: [PATCH 05/12] Credit --- mev_inspect/string_io.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mev_inspect/string_io.py b/mev_inspect/string_io.py index 1c0a40d..37efb5f 100644 --- a/mev_inspect/string_io.py +++ b/mev_inspect/string_io.py @@ -1,3 +1,5 @@ +"""This is taken from https://hakibenita.com/fast-load-data-python-postgresql""" + import io from typing import Iterator, Optional From 0ed4f5456e7beaa9d3853da06820904e3f9d27bc Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Mon, 3 Jan 2022 15:20:00 -0500 Subject: [PATCH 06/12] Move list util to db shared --- mev_inspect/crud/traces.py | 13 +++---------- mev_inspect/db.py | 21 ++++++++++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index 6fe9004..6fe3435 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -1,8 +1,8 @@ import json from datetime import datetime, timezone -from typing import Any, List +from typing import List -from mev_inspect.db import write_as_csv +from mev_inspect.db import to_postgres_list, write_as_csv from mev_inspect.models.traces import ClassifiedTraceModel from mev_inspect.schemas.traces import ClassifiedTrace @@ -47,7 +47,7 @@ def write_classified_traces( trace.value, trace.gas_used, trace.error, - _to_csv_list(trace.trace_address), + to_postgres_list(trace.trace_address), trace.transaction_position, ) for trace in classified_traces @@ -58,10 +58,3 @@ def write_classified_traces( def _inputs_as_json(trace) -> str: return json.dumps(json.loads(trace.json(include={"inputs"}))["inputs"]) - - -def _to_csv_list(values: List[Any]) -> str: - if len(values) == 0: - return "{}" - - return "{" + ",".join(map(str, values)) + "}" diff --git a/mev_inspect/db.py b/mev_inspect/db.py index 8e4d57a..dd7c66a 100644 --- a/mev_inspect/db.py +++ b/mev_inspect/db.py @@ -1,5 +1,5 @@ import os -from typing import Any, Iterable, Optional +from typing import Any, Iterable, List, Optional from sqlalchemy import create_engine, orm from sqlalchemy.orm import sessionmaker @@ -67,12 +67,6 @@ def get_trace_session() -> Optional[orm.Session]: return None -def _clean_csv_value(value: Optional[Any]) -> str: - if value is None: - return r"\N" - return str(value).replace("\n", "\\n") - - def write_as_csv( db_session, table_name: str, @@ -84,3 +78,16 @@ def write_as_csv( with db_session.connection().connection.cursor() as cursor: cursor.copy_from(csv_iterator, table_name, sep="|") + + +def _clean_csv_value(value: Optional[Any]) -> str: + if value is None: + return r"\N" + return str(value).replace("\n", "\\n") + + +def to_postgres_list(values: List[Any]) -> str: + if len(values) == 0: + return "{}" + + return "{" + ",".join(map(str, values)) + "}" From 24a6ba670e1d57f93419c172d16445c28bd88f7a Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 09:50:44 -0500 Subject: [PATCH 07/12] Bring back the array for diff checks --- mev_inspect/crud/traces.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index 6fe3435..45c0037 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -57,4 +57,6 @@ def write_classified_traces( def _inputs_as_json(trace) -> str: - return json.dumps(json.loads(trace.json(include={"inputs"}))["inputs"]) + inputs = json.dumps(json.loads(trace.json(include={"inputs"}))["inputs"]) + inputs_with_array = f"[{inputs}]" + return inputs_with_array From f84b9d45d321efb3f0962e84c2621da8a06d2ca3 Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 10:05:53 -0500 Subject: [PATCH 08/12] Add placeholder file to detect which code is running --- hello.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 hello.txt diff --git a/hello.txt b/hello.txt new file mode 100644 index 0000000..ce01362 --- /dev/null +++ b/hello.txt @@ -0,0 +1 @@ +hello From 02a0adc8e2a4bb3e20e23f21b030152212153faf Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 10:16:50 -0500 Subject: [PATCH 09/12] Break it to prove tests work --- mev_inspect/crud/traces.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index 45c0037..b7e1538 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -35,7 +35,8 @@ def write_classified_traces( trace.transaction_hash, trace.block_number, trace.classification.value, - trace.type.value, + 0, + # trace.type.value, str(trace.protocol), trace.abi_name, trace.function_name, From 28b37c723c0413d048e945edd59249fb7ac7b41a Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 10:19:39 -0500 Subject: [PATCH 10/12] Put it back --- mev_inspect/crud/traces.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index b7e1538..45c0037 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -35,8 +35,7 @@ def write_classified_traces( trace.transaction_hash, trace.block_number, trace.classification.value, - 0, - # trace.type.value, + trace.type.value, str(trace.protocol), trace.abi_name, trace.function_name, From eff77dd482df5696d77a074904c13599843f265f Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 11:24:33 -0500 Subject: [PATCH 11/12] goodbye --- hello.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 hello.txt diff --git a/hello.txt b/hello.txt deleted file mode 100644 index ce01362..0000000 --- a/hello.txt +++ /dev/null @@ -1 +0,0 @@ -hello From 17823b5aaec78df2a20976cba67612aff2ddc6c6 Mon Sep 17 00:00:00 2001 From: Luke Van Seters Date: Tue, 4 Jan 2022 11:25:27 -0500 Subject: [PATCH 12/12] comment => variable --- mev_inspect/crud/traces.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mev_inspect/crud/traces.py b/mev_inspect/crud/traces.py index 45c0037..903026e 100644 --- a/mev_inspect/crud/traces.py +++ b/mev_inspect/crud/traces.py @@ -28,10 +28,10 @@ def write_classified_traces( db_session, classified_traces: List[ClassifiedTrace], ) -> None: - now = datetime.now(timezone.utc) + classified_at = datetime.now(timezone.utc) items = ( ( - now, # classified_at - gets a default + classified_at, trace.transaction_hash, trace.block_number, trace.classification.value,