move function to more sensible file, sample 5k rows instead of 10k

explorerhq · Jul 23, 2024 · 18d3c14 · 18d3c14
1 parent 12de482
commit 18d3c14
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 15 deletions.
diff --git a/explorer/ee/db_connections/create_sqlite.py b/explorer/ee/db_connections/create_sqlite.py
@@ -1,7 +1,7 @@
 import os
 from io import BytesIO
 
-from explorer.ee.db_connections.mime import get_parser
+from explorer.ee.db_connections.type_infer import get_parser
 from explorer.ee.db_connections.utils import pandas_to_sqlite
 
 

diff --git a/explorer/ee/db_connections/mime.py b/explorer/ee/db_connections/mime.py
@@ -1,24 +1,11 @@
 import csv
 import json
-from explorer.ee.db_connections.type_infer import json_to_typed_df, json_list_to_typed_df, csv_to_typed_df
 
 # These are 'shallow' checks. They are just to understand if the upload appears valid at surface-level.
 # A deeper check will happen when pandas tries to parse the file.
 # This is designed to be quick, and simply assigned the right (full) parsing function to the uploaded file.
 
 
-def get_parser(file):
-    if is_csv(file):
-        return csv_to_typed_df
-    if is_json_list(file):
-        return json_list_to_typed_df
-    if is_json(file):
-        return json_to_typed_df
-    if is_sqlite(file):
-        return None
-    raise ValueError(f"File {file.content_type} not supported.")
-
-
 def is_csv(file):
     if file.content_type != "text/csv":
         return False

diff --git a/explorer/ee/db_connections/type_infer.py b/explorer/ee/db_connections/type_infer.py
@@ -1,11 +1,24 @@
 import io
 import json
+from explorer.ee.db_connections.mime import is_csv, is_json, is_sqlite, is_json_list
 
 
-MAX_TYPING_SAMPLE_SIZE = 10000
+MAX_TYPING_SAMPLE_SIZE = 5000
 SHORTEST_PLAUSIBLE_DATE_STRING = 5
 
 
+def get_parser(file):
+    if is_csv(file):
+        return csv_to_typed_df
+    if is_json_list(file):
+        return json_list_to_typed_df
+    if is_json(file):
+        return json_to_typed_df
+    if is_sqlite(file):
+        return None
+    raise ValueError(f"File {file.content_type} not supported.")
+
+
 def csv_to_typed_df(csv_bytes, delimiter=",", has_headers=True):
     import pandas as pd
     csv_file = io.BytesIO(csv_bytes)