Skip to content

Commit

Permalink
fix testcase
Browse files Browse the repository at this point in the history
  • Loading branch information
prenner committed Jan 22, 2025
1 parent 99aba55 commit 83ebd79
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 13 deletions.
4 changes: 2 additions & 2 deletions python-threatexchange/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ RUN rm -rf /usr/src/threatexchange
ENV TX_STATEDIR=/var/lib/threatexchange

VOLUME ["/var/lib/threatexchange"]
CMD ["threatexchange"]
ENTRYPOINT ["/usr/local/bin/threatexchange"]
# CMD ["threatexchange"]
# ENTRYPOINT ["/usr/local/bin/threatexchange"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
16 changes: 12 additions & 4 deletions python-threatexchange/threatexchange/exchanges/impl/ncmec_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ class NCMECCheckpoint(

# The biggest value of "to", and the next "from"
get_entries_max_ts: int
# A url to fetch the next page of results.
# A url to fetch the next page of results
# Only reference this value through get_paging_url_if_recent
paging_url: str
# a timestamp for the last fetch time, specifically used with a paging_url.
paging_url: str = ""
# a timestamp for the last fetch time, specifically used with a pagingpyth_url
# NCMEC suggests not storing paging_urls long term so we consider them invalid
# 12hr after the last_fetch_time
last_fetch_time: int
last_fetch_time: int = field(hash=True, default_factory=lambda: int(time.time()))

def get_progress_timestamp(self) -> t.Optional[int]:
return self.get_entries_max_ts
Expand All @@ -68,6 +68,14 @@ def __setstate__(self, d: t.Dict[str, t.Any]) -> None:
### field 'max_timestamp' renamed to 'get_entries_max_ts'
if "max_timestamp" in d:
d["get_entries_max_ts"] = d.pop("max_timestamp")

# 1.0.0 => 1.2.3:
# Add last_fetch_time
# the default_factory value was not being set correctly when
# reading from pickle
if not "last_fetch_time" in d:
d["last_fetch_time"] = int(time.time())

self.__dict__ = d


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class SignalOpinionOwnerRemoved:
import copy
from dataclasses import dataclass, field
import pickle
import time
import typing as t

import pytest
Expand Down Expand Up @@ -148,7 +149,9 @@ def get_NCMECCheckpoint() -> t.Tuple[NCMECCheckpoint, t.Sequence[object]]:
max_ts = 1197433091

current = NCMECCheckpoint(
get_entries_max_ts=max_ts, paging_url="", last_fetch_time=0
get_entries_max_ts=max_ts,
paging_url="",
last_fetch_time=int(time.time()),
)

# 1.0.x
Expand Down Expand Up @@ -183,17 +186,21 @@ class NCMECCheckpointTsMoved(FetchCheckpointBase):


@pytest.mark.parametrize(
("current_version", "historical_versions"),
"get_checkpoint_func",
[
get_SignalOpinion(),
get_FBThreatExchangeOpinion(),
get_NCMECOpinion(),
get_NCMECCheckpoint(),
get_SignalOpinion,
get_FBThreatExchangeOpinion,
get_NCMECOpinion,
get_NCMECCheckpoint,
],
)
def test_previous_pickle_state(
current_version: object, historical_versions: t.Sequence[object]
get_checkpoint_func: t.Callable[[], t.Tuple[object, t.Sequence[object]]],
monkeypatch: pytest.MonkeyPatch,
):
monkeypatch.setattr("time.time", lambda: 10**8)

current_version, historical_versions = get_checkpoint_func()
# Sanity
serialized = pickle.dumps(current_version)
assert (
Expand Down

0 comments on commit 83ebd79

Please sign in to comment.