Skip to content

Commit

Permalink
Support multiple possible street names, try all and select the best s…
Browse files Browse the repository at this point in the history
…treet name candidat
  • Loading branch information
frodrigo committed Jan 2, 2017
1 parent a0d468f commit c00f1ff
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 43 deletions.
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Rue Beau Site 35610 Sains

Use as normal Addok search query, with required `q` and other `limit`, `autocomplete`, etc, including filters.

Search2steps introduce a new required parameter: `q0` for preliminary search.
Search2steps introduce a new parameter: `q0` for preliminary search.

```
http://localhost:7878/search2steps?q0=Brest&q=Rue+du+Restic&limit=5
Expand Down Expand Up @@ -57,11 +57,12 @@ API_ENDPOINTS = [

Configure the plugin:
```python
SEARCH_2_STEPS_STEP1_TYPE = 'city'
SEARCH_2_STEPS_STEP1_TYPES = ['city', 'locality']
SEARCH_2_STEPS_STEP1_THRESHOLD = 0.5
SEARCH_2_STEPS_STEP1_LIMIT = 10
SEARCH_2_STEPS_PIVOT_FILTER = 'citycode'
SEARCH_2_STEPS_PIVOT_REWRITE = 'city'
SEARCH_2_STEPS_STEP2_TYPE = 'housenumber'
SEARCH_2_STEPS_STEP2_THRESHOLD = 0.2
```

Expand All @@ -74,11 +75,12 @@ Search in addok in two steps by:
### Step one
Configuration must specify the type of object looked for in this step, it's used as filter in step one.
```python
SEARCH_2_STEPS_STEP1_TYPE = 'city'
SEARCH_2_STEPS_STEP1_TYPES = ['city', 'locality']
```
Only result with score above the threshold and under this limit will remain available for next step:
```python
SEARCH_2_STEPS_STEP1_THRESHOLD = 0.5
SEARCH_2_STEPS_STEP2_TYPE = 'housenumber'
SEARCH_2_STEPS_STEP1_LIMIT = 10
```

Expand Down Expand Up @@ -125,3 +127,17 @@ query:
q="Bordeaux-Saint-Clair Rue des lilas" citycode=76117 limit 1
q="Lignan-de-Bordeaux Rue des lilas" citycode=33245 limit 1
```

## Bonus
The street can be a list of possible candidates when come from a loseley casted address.
```
street? App 6
street? Rue Beau Rosier
postcode: 33000
city: Bordeaux
```

Separate possible alternate street names with a pipe `|`:
```
q0=33000+Bordeaux&q=App+6|Rue+Beau+Rosier
```
128 changes: 88 additions & 40 deletions search2steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,36 @@
from werkzeug.exceptions import BadRequest
from addok.server import View, BaseCSV, log_query, log_notfound
from addok.core import search
import itertools

def multiple_search(queries, **args):
if len(queries) > 0:
return max([search(query, **args) for query in queries], key=lambda x: x and len(x) > 0 and x[0].score or 0)
else:
return []

def search2steps_step1(config, query1, limit, **filters):
filters_step_1 = filters.copy()
filters_step_1['type'] = config.SEARCH_2_STEPS_STEP1_TYPE
return search(query1, limit=limit, autocomplete=False, **filters_step_1)
ret = []
for type in config.SEARCH_2_STEPS_STEP1_TYPES:
filters_step_1['type'] = type
ret += search(query1, limit=limit, autocomplete=False, **filters_step_1)
return sorted(ret, key=lambda k: k.score, reverse=True)[0:limit]

def search2steps(config, query1, query2, autocomplete, limit, **filters):
def search2steps(config, query1, queries2, autocomplete, limit, **filters):
# Fetch the join value
join_value = threshold = results = None

# Run step 1 query
results = search2steps_step1(config, query1, config.SEARCH_2_STEPS_STEP1_LIMIT, **filters)
if not query2:
return results[0:limit]
results1 = search2steps_step1(config, query1, config.SEARCH_2_STEPS_STEP1_LIMIT, **filters)
if len(queries2) == 0:
return results1[0:limit]

ret = []
if results:
if results1:
params_steps_2 = []
# Collect step 1 results
for result in results:
for result in results1:
query_step_1 = result.__getattr__(config.SEARCH_2_STEPS_PIVOT_REWRITE)

if config.SEARCH_2_STEPS_PIVOT_FILTER in filters and filters[config.SEARCH_2_STEPS_PIVOT_FILTER]:
Expand All @@ -42,7 +52,8 @@ def search2steps(config, query1, query2, autocomplete, limit, **filters):
# Set step 2 query filter from step 1 result
filters_step_2 = filters.copy()
filters_step_2[config.SEARCH_2_STEPS_PIVOT_FILTER] = join_value
results_step_2 = search(query2 + ' ' + query_step_1, limit=limit, autocomplete=autocomplete, **filters_step_2)
filters_step_2['type'] = config.SEARCH_2_STEPS_STEP2_TYPE
results_step_2 = multiple_search([q + ' ' + query_step_1 for q in queries2], limit=limit, autocomplete=autocomplete, **filters_step_2)
append = False
if results_step_2:
for result_step_2 in results_step_2:
Expand All @@ -56,8 +67,8 @@ def search2steps(config, query1, query2, autocomplete, limit, **filters):
if result.score > config.SEARCH_2_STEPS_STEP2_THRESHOLD:
ret.append(result)

results = search(query2 + ' ' + query1, limit=limit, autocomplete=autocomplete, **filters)
for result in results:
results_full = multiple_search([q + ' ' + query1 for q in queries2], limit=limit, autocomplete=autocomplete, **filters)
for result in results_full:
# Lower the score
result.score *= config.SEARCH_2_STEPS_STEP1_THRESHOLD

Expand All @@ -75,17 +86,20 @@ def search2steps(config, query1, query2, autocomplete, limit, **filters):
ids.append(e.id)
return uniq
else:
return []
return results1[0:limit]

class Search2Steps(View):

endpoint = 'search2steps'

def get(self):
q0 = self.request.args.get('q0', '')
if not q0:
return Response('Missing query part one (q0=)', status=400)
q = self.request.args.get('q', '')
q0 = self.request.args.get('q0')
q0 = q0.split('|') if q0 else []
q = self.request.args.get('q')
q = q.split('|') if q else []
if not q and not q0:
return Response('Missing query', status=400)

try:
limit = int(self.request.args.get('limit'))
except (ValueError, TypeError):
Expand All @@ -97,16 +111,29 @@ def get(self):
try:
lat = float(self.request.args.get('lat'))
lon = float(self.request.args.get('lon',
self.request.args.get('lng')))
self.request.args.get('lng',
self.request.args.get('long'))))
center = [lat, lon]
except (ValueError, TypeError):
lat = None
lon = None
center = None
filters = self.match_filters()
results = search2steps(self.config, q0, q, autocomplete=autocomplete, limit=limit, lat=lat, lon=lon, **filters)
if not results:
log_notfound(q0 + ' ' + q)
log_query(q0 + ' ' + q, results)
return self.to_geojson(results, query=q0 + ' ' + q)

if len(q0) == 0:
results = multiple_search(q, limit=limit, autocomplete=False, lat=lat, lon=lon, **filters)
query = '|'.join(q)
if not results:
log_notfound(query)
log_query(query, results)
return self.to_geojson(results, query=query, filters=filters, center=center, limit=limit)
else:
results = search2steps(self.config, q0[0], q, autocomplete=autocomplete, limit=limit, lat=lat, lon=lon, **filters)
query = '|'.join(q0) + ' ' + ('|').join(q)
if not results:
log_notfound(query)
log_query(query, results)
return self.to_geojson(results, query=query, filters=filters, center=center, limit=limit)

class CSVSearch2steps(BaseCSV):

Expand All @@ -122,9 +149,11 @@ def compute_fieldnames(self):
raise BadRequest("Cannot found column '{}' in columns {}".format(column, self.fieldnames))

def process_row(self, row):
row_split = dict([(k, v and v.split('|')) for k, v in row.items()])
# Generate all combinations
# We don't want None in a join.
q0 = ' '.join([row[k] or '' for k in self.columns0])
q = ' '.join([row[k] or '' for k in self.columns])
q0 = list(filter(lambda x: x and x != '', [' '.join([l or '' for l in i]) for i in itertools.product(*[row_split[k] or [None] for k in self.columns0])]))
q = list(filter(lambda x: x and x != '', [' '.join([l or '' for l in i]) for i in itertools.product(*[row_split[k] or [None] for k in self.columns])]))
filters = self.match_row_filters(row)
lat_column = self.request.form.get('lat')
lon_column = self.request.form.get('lon')
Expand All @@ -134,20 +163,39 @@ def process_row(self, row):
if lat and lon:
filters['lat'] = float(lat)
filters['lon'] = float(lon)
results = search2steps(self.config, q0, q, autocomplete=False, limit=1, **filters)
log_query(q0 + ' ' + q, results)
if results:
result = results[0]
row.update({
'latitude': result.lat,
'longitude': result.lon,
'result_label': str(result),
'result_score': round(result.score, 2),
'result_type': result.type,
'result_id': result.id,
'result_housenumber': result.housenumber,
'result_citycode': result.citycode,
})
self.add_fields(row, result)
if len(q0) == 0:
results = multiple_search(q, autocomplete=False, limit=1, **filters)
log_query('|'.join(q), results)
if results:
result = results[0]
row.update({
'latitude': result.lat,
'longitude': result.lon,
'result_label': str(result),
'result_score': round(result.score, 2),
'result_type': result.type,
'result_id': result.id,
'result_housenumber': result.housenumber,
'result_citycode': result.citycode,
})
self.add_fields(row, result)
else:
log_notfound('|'.join(q))
else:
log_notfound(q0 + ' ' + q)
results = search2steps(self.config, q0[0], q, autocomplete=False, limit=1, **filters)
log_query('|'.join(q0) + ' ' + ('|').join(q), results)
if results:
result = results[0]
row.update({
'latitude': result.lat,
'longitude': result.lon,
'result_label': str(result),
'result_score': round(result.score, 2),
'result_type': result.type,
'result_id': result.id,
'result_housenumber': result.housenumber,
'result_citycode': result.citycode,
})
self.add_fields(row, result)
else:
log_notfound('|'.join(q0) + ' ' + ('|').join(q))

0 comments on commit c00f1ff

Please sign in to comment.