Skip to content

Commit

Permalink
move function to more sensible file, sample 5k rows instead of 10k
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisclark committed Jul 23, 2024
1 parent 12de482 commit 18d3c14
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 15 deletions.
2 changes: 1 addition & 1 deletion explorer/ee/db_connections/create_sqlite.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from io import BytesIO

from explorer.ee.db_connections.mime import get_parser
from explorer.ee.db_connections.type_infer import get_parser
from explorer.ee.db_connections.utils import pandas_to_sqlite


Expand Down
13 changes: 0 additions & 13 deletions explorer/ee/db_connections/mime.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
import csv
import json
from explorer.ee.db_connections.type_infer import json_to_typed_df, json_list_to_typed_df, csv_to_typed_df

# These are 'shallow' checks. They are just to understand if the upload appears valid at surface-level.
# A deeper check will happen when pandas tries to parse the file.
# This is designed to be quick, and simply assigned the right (full) parsing function to the uploaded file.


def get_parser(file):
if is_csv(file):
return csv_to_typed_df
if is_json_list(file):
return json_list_to_typed_df
if is_json(file):
return json_to_typed_df
if is_sqlite(file):
return None
raise ValueError(f"File {file.content_type} not supported.")


def is_csv(file):
if file.content_type != "text/csv":
return False
Expand Down
15 changes: 14 additions & 1 deletion explorer/ee/db_connections/type_infer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
import io
import json
from explorer.ee.db_connections.mime import is_csv, is_json, is_sqlite, is_json_list


MAX_TYPING_SAMPLE_SIZE = 10000
MAX_TYPING_SAMPLE_SIZE = 5000
SHORTEST_PLAUSIBLE_DATE_STRING = 5


def get_parser(file):
if is_csv(file):
return csv_to_typed_df
if is_json_list(file):
return json_list_to_typed_df
if is_json(file):
return json_to_typed_df
if is_sqlite(file):
return None
raise ValueError(f"File {file.content_type} not supported.")


def csv_to_typed_df(csv_bytes, delimiter=",", has_headers=True):
import pandas as pd
csv_file = io.BytesIO(csv_bytes)
Expand Down

0 comments on commit 18d3c14

Please sign in to comment.