Skip to content

Commit

Permalink
rough prototype that would be compatible with alpha pyterrier artifacts
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Aug 29, 2024
1 parent dcd73f2 commit 1a5fe8d
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
6 changes: 6 additions & 0 deletions python-client/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ console_scripts =
tira-run-notebook = tira.tira_run_notebook:main
tira-run-inference-server = tira.tira_run_inference_server:main

pyterrier.artifact.url_protocol_resolver =
tira = tira.pyterrier_integration:pt_artifact_entrypoint

pyterrier.artifact =
tira.pt_transformer = tira.pyterrier_integration:pt_transformer

[options.package_data]
tira.static_redirects = *.json

Expand Down
35 changes: 35 additions & 0 deletions python-client/tira/pyterrier_integration.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from pathlib import Path
from tira.tirex import IRDS_TO_TIREX_DATASET


class PyTerrierIntegration():
Expand Down Expand Up @@ -245,3 +246,37 @@ def splade_index(self, dataset:str, approach: str='workshop-on-open-web-search/n
import pyterrier as pt
ret = Path(self.tira_client.get_run_output('ir-lab-sose-2024/naverlabseurope/Splade (Index)', translate_irds_id_to_tirex(dataset))) / 'spladeindex'
return pt.IndexFactory.of(os.path.abspath(ret))


def pt_transformer(path):
import pyterrier as pt
if not pt.started():
pt.init()
# TODO hacked for the moment, in reality, we must delegate to the classes above.
return pt.transformer.get_transformer(pt.io.read_results(path + '/output/run.txt'))

def pt_artifact_entrypoint(url):
url = url.netloc + url.path
dataset_id = None, None

for irds_id, tira_dataset_id in IRDS_TO_TIREX_DATASET.items():
if url.startswith(irds_id):
dataset_id = tira_dataset_id
url = url.replace(irds_id, 'ir-benchmarks')
break
if not dataset_id:
raise ValueError('Very rough implementation...')

from tira.rest_api_client import Client
from pathlib import Path
import json

tira = Client()
ret = tira.get_run_output(url, dataset_id)

ret = Path(ret).parent
if not (ret / 'pt_meta.json').is_file():
with open(ret / 'pt_meta.json', 'w') as f:
f.write(json.dumps({"type": "tira", "format": "pt_transformer"}))
return str(ret.absolute())

0 comments on commit 1a5fe8d

Please sign in to comment.