Geo Spatial Indexing with Sphinx

by lih on 05 November 2011 - 01:02am in
  • Installation

    • Go to http://sphinxsearch.com/downloads/ and download the latest release.
    • Follow the instructions described on that page. In a nutsell, you do ./configure, then make, then sudo make install.
  • Configure your sphinx.conf

    • Go to /etc/sphinxsearch/ and make a new file sphinx.conf.
  • Add the following to your sphinx.conf. Modify database username, password, databasename, the sql statement, etc., but feel free to use the following as a template.

source hostel_geo
{
# data source type. mandatory, no default value
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass =
sql_db = PackerShack
sql_port = 3306

# UNIX socket name
# optional, default is empty (reuse client library defaults)
# usually '/var/lib/mysql/mysql.sock' on Linux
# usually '/tmp/mysql.sock' on FreeBSD
#
# sql_sock = /tmp/mysql.sock


# MySQL specific client connection flags
# optional, default is 0
#
# mysql_connect_flags = 32 # enable compression


# pre-query, executed before the main fetch query
# multi-value, optional, default is empty list of queries
#
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=off

# OFF document fetch query
# mandatory, integer document ID field MUST be the first selected column
sql_query = \
                SELECT id, radians(lat) as lat, radians(lng) as lng from hostel;


# floating point attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# values are stored in single precision, 32-bit IEEE 754 format
#
sql_attr_float = lat
sql_attr_float = lng

# ranged query throttling, in milliseconds
# optional, default is 0 which means no delay
# enforces given delay before each query step
sql_ranged_throttle = 0


# document info query, ONLY for CLI search (ie. testing and debugging)
# optional, default is empty
# must contain $id macro and must fetch the document by that id
sql_query_info = SELECT * FROM hostel WHERE id=$id
}

index hostel_geo
{
# document source(s) to index
# multi-value, mandatory
# document IDs must be globally unique across all sources
source = hostel_geo

# index files path and file name, without extension
# mandatory, path must be writable, extensions will be auto-appended
path = /var/lib/sphinxsearch/data/hostel_geo

# document attribute values (docinfo) storage mode
# optional, default is 'extern'
# known values are 'none', 'extern' and 'inline'
docinfo = extern

# memory locking for cached data (.spa and .spi), to prevent swapping
# optional, default is 0 (do not mlock)
# requires searchd to be run from root
mlock = 0

# a list of morphology preprocessors to apply
# optional, default is empty
morphology = none


# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1

# charset encoding type
# optional, default is 'sbcs'
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
charset_type = utf-8

# charset definition and case folding rules "table"
# optional, default value depends on charset_type
#
# defaults are configured to include English and Russian characters only
# you need to change the table to include additional ones
# this behavior MAY change in future versions
#
# 'sbcs' default value is
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
#
# 'utf-8' default value is
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F


# ignored characters list
# optional, default value is empty
#
# ignore_chars = U+00AD


# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 0

# enable star-syntax (wildcards) when searching prefix/infix indexes
# known values are 0 and 1
# optional, default is 0 (do not use wildcard syntax)
#
        enable_star = 0
}


indexer
{
mem_limit = 32M
}


searchd
{
port = 3312
log = /var/log/sphinxsearch/searchd.log
query_log = /var/log/sphinxsearch/query.log
read_timeout = 5
max_children = 30
pid_file = /var/run/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}

  • Run indexer --rotate --config /etc/sphinxsearch/sphinx.conf hostel_geo

    • Depending on what you've set the path attribute in the index block, you might need to run the command with sudo. Also, make sure that the path actually does exist.
  • Caveat:
    • It looks like Sphinx needs at least one non-attribute field to be happy.
  • Run searchd
    • searchd --config /etc/sphinxsearch/sphinx.conf
  • PythonClient:

        _lat = 42.6411
        _lng = 18.1085
        c = SphinxClient()
        c.SetServer("localhost", 3312)
        c.SetMatchMode(sphinxapi.SPH_MATCH_ALL)
        c.SetGeoAnchor('lat', 'lng', math.radians(_lat), math.radians(_lng))
        c.SetFilterFloatRange('@geodist asc', 0.0, 1609.0) # 1 mile = 1.61 kilometer
        c.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "@geodist ASC")
        r = c.Query('', 'hostel_geo')