Class: Sphinx::Client

Inherits:
Object
  • Object
show all
Includes:
Constants
Defined in:
lib/sphinx/client.rb

Overview

The Sphinx Client API is used to communicate with searchd daemon and perform requests.

Examples:

sphinx = Sphinx::Client.new
result = sphinx.query('test')
ids = result['matches'].map { |match| match['id'] }
posts = Post.all :conditions => { :id => ids },
                 :order => "FIELD(id,#{ids.join(',')})"

docs = posts.map(&:body)
excerpts = sphinx.build_excerpts(docs, 'index', 'test')

Constant Summary

Constant Summary

Constants included from Constants

Sphinx::Constants::QUERY_FLAGS, Sphinx::Constants::SPH_ATTR_BIGINT, Sphinx::Constants::SPH_ATTR_BOOL, Sphinx::Constants::SPH_ATTR_FACTORS, Sphinx::Constants::SPH_ATTR_FLOAT, Sphinx::Constants::SPH_ATTR_INTEGER, Sphinx::Constants::SPH_ATTR_MULTI, Sphinx::Constants::SPH_ATTR_MULTI64, Sphinx::Constants::SPH_ATTR_ORDINAL, Sphinx::Constants::SPH_ATTR_STRING, Sphinx::Constants::SPH_ATTR_TIMESTAMP, Sphinx::Constants::SPH_FILTER_FLOATRANGE, Sphinx::Constants::SPH_FILTER_RANGE, Sphinx::Constants::SPH_FILTER_VALUES, Sphinx::Constants::SPH_GROUPBY_ATTR, Sphinx::Constants::SPH_GROUPBY_ATTRPAIR, Sphinx::Constants::SPH_GROUPBY_DAY, Sphinx::Constants::SPH_GROUPBY_MONTH, Sphinx::Constants::SPH_GROUPBY_WEEK, Sphinx::Constants::SPH_GROUPBY_YEAR, Sphinx::Constants::SPH_MATCH_ALL, Sphinx::Constants::SPH_MATCH_ANY, Sphinx::Constants::SPH_MATCH_BOOLEAN, Sphinx::Constants::SPH_MATCH_EXTENDED, Sphinx::Constants::SPH_MATCH_EXTENDED2, Sphinx::Constants::SPH_MATCH_FULLSCAN, Sphinx::Constants::SPH_MATCH_PHRASE, Sphinx::Constants::SPH_RANK_BM25, Sphinx::Constants::SPH_RANK_EXPR, Sphinx::Constants::SPH_RANK_FIELDMASK, Sphinx::Constants::SPH_RANK_MATCHANY, Sphinx::Constants::SPH_RANK_NONE, Sphinx::Constants::SPH_RANK_PROXIMITY, Sphinx::Constants::SPH_RANK_PROXIMITY_BM25, Sphinx::Constants::SPH_RANK_SPH04, Sphinx::Constants::SPH_RANK_WORDCOUNT, Sphinx::Constants::SPH_SORT_ATTR_ASC, Sphinx::Constants::SPH_SORT_ATTR_DESC, Sphinx::Constants::SPH_SORT_EXPR, Sphinx::Constants::SPH_SORT_EXTENDED, Sphinx::Constants::SPH_SORT_RELEVANCE, Sphinx::Constants::SPH_SORT_TIME_SEGMENTS

Instance Method Summary (collapse)

Constructor Details

- (Client) initialize(logger = nil)

Constructs the Sphinx::Client object and sets options to their default values.

Parameters:

  • logger (Logger) (defaults to: nil)

    a logger object to put logs to. No logging will be performed when not set.



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/sphinx/client.rb', line 47

def initialize(logger = nil)
  # per-query settings
  @offset        = 0                       # how many records to seek from result-set start (default is 0)
  @limit         = 20                      # how many records to return from result-set starting at offset (default is 20)
  @mode          = SPH_MATCH_ALL           # query matching mode (default is SPH_MATCH_ALL)
  @weights       = []                      # per-field weights (default is 1 for all fields)
  @sort          = SPH_SORT_RELEVANCE      # match sorting mode (default is SPH_SORT_RELEVANCE)
  @sortby        = ''                      # attribute to sort by (defualt is "")
  @min_id        = 0                       # min ID to match (default is 0, which means no limit)
  @max_id        = 0                       # max ID to match (default is 0, which means no limit)
  @filters       = []                      # search filters
  @groupby       = ''                      # group-by attribute name
  @groupfunc     = SPH_GROUPBY_DAY         # function to pre-process group-by attribute value with
  @groupsort     = '@group desc'           # group-by sorting clause (to sort groups in result set with)
  @groupdistinct = ''                      # group-by count-distinct attribute
  @maxmatches    = 1000                    # max matches to retrieve
  @cutoff        = 0                       # cutoff to stop searching at (default is 0)
  @retrycount    = 0                       # distributed retries count
  @retrydelay    = 0                       # distributed retries delay
  @anchor        = []                      # geographical anchor point
  @indexweights  = []                      # per-index weights
  @ranker        = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  @rankexpr      = ''                      # ranking expression
  @maxquerytime  = 0                       # max query time, milliseconds (default is 0, do not limit)
  @fieldweights  = {}                      # per-field-name weights
  @overrides     = []                      # per-query attribute values overrides
  @select        = '*'                     # select-list (attributes or expressions, with optional aliases)
  @query_flags   = 0
  @predictedtime = 0
  @outerorderby  = ''
  @outeroffset   = 0
  @outerlimit    = 0
  @hasouter      = false

  # per-reply fields (for single-query case)
  @error         = ''                      # last error message
  @warning       = ''                      # last warning message
  @connerror     = false                   # connection error vs remote error flag

  @reqs          = []                      # requests storage (for multi-query case)
  @mbenc         = ''                      # stored mbstring encoding
  @timeout       = 0                       # connect timeout
  @retries       = 1                       # number of connect retries in case of emergency
  @reqtimeout    = 0                       # request timeout
  @reqretries    = 1                       # number of request retries in case of emergency

  # per-client-object settings
  # searchd servers list
  @servers       = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze
  @logger        = logger

  logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

- (Object) method_missing(method_id, *arguments, &block) (protected)

Enables ability to skip set_ prefix for methods inside #query block.

Examples:

sphinx.query('test') do
  match_mode :all
  id_range 10, 100
end


2595
2596
2597
2598
2599
2600
2601
# File 'lib/sphinx/client.rb', line 2595

def method_missing(method_id, *arguments, &block)
  if @inside_eval and self.respond_to?("set_#{method_id}")
    self.send("set_#{method_id}", *arguments)
  else
    super
  end
end

Instance Method Details

- (Integer) add_query(query, index = '*', comment = '', log = true) Also known as: AddQuery

Adds additional query with current settings to multi-query batch. query is a query string. index is an index name (or names) string. Additionally if provided, the contents of comment are sent to the query log, marked in square brackets, just before the search terms, which can be very useful for debugging. Currently, this is limited to 128 characters. Returns index to results array returned from #run_queries.

Batch queries (or multi-queries) enable searchd to perform internal optimizations if possible. They also reduce network connection overheads and search process creation overheads in all cases. They do not result in any additional overheads compared to simple queries. Thus, if you run several different queries from your web page, you should always consider using multi-queries.

For instance, running the same full-text query but with different sorting or group-by settings will enable searchd to perform expensive full-text search and ranking operation only once, but compute multiple group-by results from its output.

This can be a big saver when you need to display not just plain search results but also some per-category counts, such as the amount of products grouped by vendor. Without multi-query, you would have to run several queries which perform essentially the same search and retrieve the same matches, but create result sets differently. With multi-query, you simply pass all these queries in a single batch and Sphinx optimizes the redundant full-text search internally.

#add_query internally saves full current settings state along with the query, and you can safely change them afterwards for subsequent #add_query calls. Already added queries will not be affected; there's actually no way to change them at all. Here's an example:

sphinx.set_sort_mode(:relevance)
sphinx.add_query("hello world", "documents")

sphinx.set_sort_mode(:attr_desc, :price)
sphinx.add_query("ipod", "products")

sphinx.add_query("harry potter", "books")

results = sphinx.run_queries

With the code above, 1st query will search for "hello world" in "documents" index and sort results by relevance, 2nd query will search for "ipod" in "products" index and sort results by price, and 3rd query will search for "harry potter" in "books" index while still sorting by price. Note that 2nd #set_sort_mode call does not affect the first query (because it's already added) but affects both other subsequent queries.

Additionally, any filters set up before an #add_query will fall through to subsequent queries. So, if #set_filter is called before the first query, the same filter will be in place for the second (and subsequent) queries batched through #add_query unless you call #reset_filters first. Alternatively, you can add additional filters as well.

This would also be true for grouping options and sorting options; no current sorting, filtering, and grouping settings are affected by this call; so subsequent queries will reuse current query settings.

#add_query returns an index into an array of results that will be returned from #run_queries call. It is simply a sequentially increasing 0-based integer, ie. first call will return 0, second will return 1, and so on. Just a small helper so you won't have to track the indexes manualy if you need then.

Examples:

sphinx.add_query('some search text', '*', 'search page')

Parameters:

  • query (String)

    a query string.

  • index (String) (defaults to: '*')

    an index name (or names).

  • comment (String) (defaults to: '')

    a comment to be sent to the query log.

  • log (Boolean) (defaults to: true)

    indicating whether this call should be logged.

Returns:

  • (Integer)

    an index into an array of results that will be returned from #run_queries call.

See Also:



1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
# File 'lib/sphinx/client.rb', line 1570

def add_query(query, index = '*', comment = '', log = true)
  logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger
  # build request

  # mode and limits
  request = Request.new
  request.put_int @query_flags, @offset, @limit, @mode
  # ranker
  request.put_int @ranker
  request.put_string @rankexpr if @ranker == SPH_RANK_EXPR
  # sorting
  request.put_int @sort
  request.put_string @sortby
  # query itself
  request.put_string query
  # weights
  request.put_int_array @weights
  # indexes
  request.put_string index
  # id64 range marker
  request.put_int 1
  # id64 range
  request.put_int64 @min_id.to_i, @max_id.to_i

  # filters
  request.put_int @filters.length
  @filters.each do |filter|
    request.put_string filter['attr']
    request.put_int filter['type']

    case filter['type']
      when SPH_FILTER_VALUES
        request.put_int64_array filter['values']
      when SPH_FILTER_RANGE
        request.put_int64 filter['min'], filter['max']
      when SPH_FILTER_FLOATRANGE
        request.put_float filter['min'], filter['max']
      else
        raise SphinxInternalError, 'Internal error: unhandled filter type'
    end
    request.put_int filter['exclude'] ? 1 : 0
  end

  # group-by clause, max-matches count, group-sort clause, cutoff count
  request.put_int @groupfunc
  request.put_string @groupby
  request.put_int @maxmatches
  request.put_string @groupsort
  request.put_int @cutoff, @retrycount, @retrydelay
  request.put_string @groupdistinct

  # anchor point
  if @anchor.empty?
    request.put_int 0
  else
    request.put_int 1
    request.put_string @anchor['attrlat'], @anchor['attrlong']
    request.put_float @anchor['lat'], @anchor['long']
  end

  # per-index weights
  request.put_int @indexweights.length
  @indexweights.sort_by { |idx, _| idx }.each do |idx, weight|
    request.put_string idx.to_s
    request.put_int weight
  end

  # max query time
  request.put_int @maxquerytime

  # per-field weights
  request.put_int @fieldweights.length
  @fieldweights.sort_by { |idx, _| idx }.each do |field, weight|
    request.put_string field.to_s
    request.put_int weight
  end

  # comment
  request.put_string comment

  # attribute overrides
  request.put_int @overrides.length
  for entry in @overrides do
    request.put_string entry['attr']
    request.put_int entry['type'], entry['values'].size
    entry['values'].each do |id, val|
      request.put_int64 id
      case entry['type']
        when SPH_ATTR_FLOAT
          request.put_float val.to_f
        when SPH_ATTR_BIGINT
          request.put_int64 val.to_i
        else
          request.put_int val.to_i
      end
    end
  end

  # select-list
  request.put_string @select

  # max_predicted_time
  request.put_int @predictedtime if @predictedtime > 0

  # outer select
  request.put_string @outerorderby
  request.put_int @outeroffset, @outerlimit, (@hasouter ? 1 : 0)

  # store request to requests array
  @reqs << request.to_s;
  return @reqs.length - 1
end

- (Array<String>, false) build_excerpts(docs, index, words, opts = {}) Also known as: BuildExcerpts

Excerpts (snippets) builder function. Connects to searchd, asks it to generate excerpts (snippets) from given documents, and returns the results.

docs is a plain array of strings that carry the documents' contents. index is an index name string. Different settings (such as charset, morphology, wordforms) from given index will be used. words is a string that contains the keywords to highlight. They will be processed with respect to index settings. For instance, if English stemming is enabled in the index, "shoes" will be highlighted even if keyword is "shoe". Starting with version 0.9.9-rc1, keywords can contain wildcards, that work similarly to star-syntax available in queries.

Examples:

sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello')

Parameters:

  • docs (Array<String>)

    an array of strings which represent the documents' contents.

  • index (String)

    an index which settings will be used for stemming, lexing and case folding.

  • words (String)

    a string which contains the words to highlight.

  • opts (Hash) (defaults to: {})

    a Hash which contains additional optional highlighting parameters.

Options Hash (opts):

  • 'before_match' (String) — default: "<b>"

    a string to insert before a keyword match.

  • 'after_match' (String) — default: "</b>"

    a string to insert after a keyword match.

  • 'chunk_separator' (String) — default: " ... "

    a string to insert between snippet chunks (passages).

  • 'limit' (Integer) — default: 256

    maximum snippet size, in symbols (codepoints).

  • 'around' (Integer) — default: 5

    how many words to pick around each matching keywords block.

  • 'exact_phrase' (Boolean) — default: false

    whether to highlight exact query phrase matches only instead of individual keywords.

  • 'single_passage' (Boolean) — default: false

    whether to extract single best passage only.

  • 'use_boundaries' (Boolean) — default: false

    whether to extract passages by phrase boundaries setup in tokenizer.

  • 'weight_order' (Boolean) — default: false

    whether to sort the extracted passages in order of relevance (decreasing weight), or in order of appearance in the document (increasing position).

Returns:

  • (Array<String>, false)

    a plain array of strings with excerpts (snippets) on success; otherwise, false.

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
# File 'lib/sphinx/client.rb', line 1868

def build_excerpts(docs, index, words, opts = {})
  raise ArgumentError, '"docs" argument must be Array'   unless docs.kind_of?(Array)
  raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
  raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
  raise ArgumentError, '"opts" argument must be Hash'    unless opts.kind_of?(Hash)

  docs.each do |doc|
    raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
  end

  # fixup options
  opts = HashWithIndifferentAccess.new(
    :before_match         => '<b>',
    :after_match          => '</b>',
    :chunk_separator      => ' ... ',
    :limit                => 256,
    :limit_passages       => 0,
    :limit_words          => 0,
    :around               => 5,
    :exact_phrase         => false,
    :single_passage       => false,
    :use_boundaries       => false,
    :weight_order         => false,
    :query_mode           => false,
    :force_all_words      => false,
    :start_passage_id     => 1,
    :load_files           => false,
    :html_strip_mode      => 'index',
    :allow_empty          => false,
    :passage_boundary     => 'none',
    :emit_zones           => false,
    :load_files_scattered => false
  ).update(opts)

  # build request

  # v.1.2 req
  flags  = 1
  flags |= 2    if opts[:exact_phrase]
  flags |= 4    if opts[:single_passage]
  flags |= 8    if opts[:use_boundaries]
  flags |= 16   if opts[:weight_order]
  flags |= 32   if opts[:query_mode]
  flags |= 64   if opts[:force_all_words]
  flags |= 128  if opts[:load_files]
  flags |= 256  if opts[:allow_empty]
  flags |= 512  if opts[:emit_zones]
  flags |= 1024 if opts[:load_files_scattered]

  request = Request.new
  request.put_int 0, flags # mode=0, flags=1 (remove spaces)
  # req index
  request.put_string index.to_s
  # req words
  request.put_string words

  # options
  request.put_string opts[:before_match], opts[:after_match], opts[:chunk_separator]
  request.put_int opts[:limit].to_i, opts[:around].to_i
  request.put_int opts[:limit_passages].to_i, opts[:limit_words].to_i, opts[:start_passage_id].to_i
  request.put_string opts[:html_strip_mode], opts[:passage_boundary]

  # documents
  request.put_int docs.size
  request.put_string(*docs)

  response = perform_request(:excerpt, request)

  # parse response
  docs.map { response.get_string }
end

- (Array<Hash>) build_keywords(query, index, hits) Also known as: BuildKeywords

Extracts keywords from query using tokenizer settings for given index, optionally with per-keyword occurrence statistics. Returns an array of hashes with per-keyword information.

query is a query to extract keywords from. index is a name of the index to get tokenizing settings and keyword occurrence statistics from. hits is a boolean flag that indicates whether keyword occurrence statistics are required.

The result set consists of Hashes with the following keys and values:

'tokenized'

Tokenized keyword.

'normalized'

Normalized keyword.

'docs'

A number of documents where keyword is found (if hits param is true).

'hits'

A number of keywords occurrences among all documents (if hits param is true).

Examples:

keywords = sphinx.build_keywords("this.is.my query", "test1", false)

Parameters:

  • query (String)

    a query string.

  • index (String)

    an index to get tokenizing settings and keyword occurrence statistics from.

  • hits (Boolean)

    indicates whether keyword occurrence statistics are required.

Returns:

  • (Array<Hash>)

    an Array of Hashes in format specified above.

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
# File 'lib/sphinx/client.rb', line 1976

def build_keywords(query, index, hits)
  raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
  raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
  raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)

  # build request
  request = Request.new
  # v.1.0 req
  request.put_string query # req query
  request.put_string index # req index
  request.put_int hits ? 1 : 0

  response = perform_request(:keywords, request)

  # parse response
  nwords = response.get_int
  (0...nwords).map do
    tokenized = response.get_string
    normalized = response.get_string

    entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized)
    entry['docs'], entry['hits'] = response.get_ints(2) if hits

    entry
  end
end

- (Boolean) close Also known as: Close

Closes previously opened persistent connection.

This method could be used only when a single searchd server configured.

Examples:

begin
  sphinx.open
  # perform several requests
ensure
  sphinx.close
end

Returns:

  • (Boolean)

    true when persistent connection has been closed; otherwise, false.

See Also:



2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
# File 'lib/sphinx/client.rb', line 2265

def close
  if @servers.size > 1
    @error = 'too many servers. persistent socket allowed only for a single server.'
    return false
  end

  unless @servers.first.persistent?
    @error = 'not connected'
    return false;
  end

  @servers.first.close_persistent!
end

- (Boolean) connect_error? Also known as: IsConnectError

Checks whether the last error was a network error on API side, or a remote error reported by searchd. Returns true if the last connection attempt to searchd failed on API side, false otherwise (if the error was remote, or there were no connection attempts at all).

Examples:

puts "Connection failed!" if sphinx.connect_error?

Returns:

  • (Boolean)

    the value indicating whether last error was a nework error on API side.

See Also:



199
200
201
# File 'lib/sphinx/client.rb', line 199

def connect_error?
  @connerror || false
end

- (String) escape_string(string) Also known as: EscapeString

Escapes characters that are treated as special operators by the query language parser.

This function might seem redundant because it's trivial to implement in any calling application. However, as the set of special characters might change over time, it makes sense to have an API call that is guaranteed to escape all such characters at all times.

@example:

escaped = sphinx.escape_string "escaping-sample@query/string"

Parameters:

  • string (String)

    is a string to escape.

Returns:

  • (String)

    an escaped string.



2123
2124
2125
# File 'lib/sphinx/client.rb', line 2123

def escape_string(string)
  string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1')
end

- (Integer) flush_attributes Also known as: FlushAttributes, FlushAttrs, flush_attrs

Force attribute flush, and block until it completes.

Examples:

sphinx.flush_attrs

Returns:

  • (Integer)

    current internal flush tag on success, -1 on failure.



2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
# File 'lib/sphinx/client.rb', line 2187

def flush_attributes
  request = Request.new
  response = perform_request(:flushattrs, request)

  # parse response
  begin
    response.get_int
  rescue EOFError
    @error = 'unexpected response length'
    -1
  end
end

- (Object) inspect

Returns a string representation of the sphinx client object.



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/sphinx/client.rb', line 103

def inspect
  params = {
    :error => @error,
    :warning => @warning,
    :connect_error => @connerror,
    :servers => @servers,
    :connect_timeout => { :timeout => @timeout, :retries => @retries },
    :request_timeout => { :timeout => @reqtimeout, :retries => @reqretries },
    :retries => { :count => @retrycount, :delay => @retrydelay },
    :limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff },
    :max_query_time => @maxquerytime,
    :overrides => @overrides,
    :select => @select,
    :match_mode => @mode,
    :ranking => { :mode => @ranker, :expression => @rankexpr },
    :sort_mode => { :mode => @sort, :sort_by => @sortby },
    :weights => @weights,
    :field_weights => @fieldweights,
    :index_weights => @indexweights,
    :id_range => { :min => @min_id, :max => @max_id },
    :filters => @filters,
    :geo_anchor => @anchor,
    :group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort },
    :group_distinct => @groupdistinct,
    :query_flags => { :bitset => @query_flags, :predicted_time => @predictedtime },
    :outer_select => { :has_outer => @hasouter, :sort_by => @outerorderby, :offset => @outeroffset, :limit => @outerlimit},
  }

  "<Sphinx::Client: %d servers, params: %s>" % [@servers.length, params.inspect]
end

- (String) last_error Also known as: GetLastError

Returns last error message, as a string, in human readable format. If there were no errors during the previous API call, empty string is returned.

You should call it when any other function (such as #query) fails (typically, the failing function returns false). The returned string will contain the error description.

The error message is not reset by this call; so you can safely call it several times if needed.

Examples:

puts sphinx.last_error

Returns:

  • (String)

    last error message.

See Also:



156
157
158
# File 'lib/sphinx/client.rb', line 156

def last_error
  @error
end

- (String) last_warning Also known as: GetLastWarning

Returns last warning message, as a string, in human readable format. If there were no warnings during the previous API call, empty string is returned.

You should call it to verify whether your request (such as #query) was completed but with warnings. For instance, search query against a distributed index might complete succesfully even if several remote agents timed out. In that case, a warning message would be produced.

The warning message is not reset by this call; so you can safely call it several times if needed.

Examples:

puts sphinx.last_warning

Returns:

  • (String)

    last warning message.

See Also:



180
181
182
# File 'lib/sphinx/client.rb', line 180

def last_warning
  @warning
end

- (Boolean) open Also known as: Open

Opens persistent connection to the server.

This method could be used only when a single searchd server configured.

Examples:

begin
  sphinx.open
  # perform several requests
ensure
  sphinx.close
end

Returns:

  • (Boolean)

    true when persistent connection has been established; otherwise, false.

See Also:



2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
# File 'lib/sphinx/client.rb', line 2225

def open
  if @servers.size > 1
    @error = 'too many servers. persistent socket allowed only for a single server.'
    return false
  end

  if @servers.first.persistent?
    @error = 'already connected'
    return false;
  end

  request = Request.new
  request.put_int(1)

  perform_request(:persist, request, nil) do |server, socket|
    server.make_persistent!(socket)
  end

  true
end

- (Sphinx::Response) perform_request(command, request, additional = nil, server = nil) {|server, socket| ... } (protected)

Connect, send query, get response.

Use this method to communicate with Sphinx server. It ensures connection will be instantiated properly, all headers will be generated properly, etc.

Parameters:

  • command (Symbol, String)

    searchd command to perform (:search, :excerpt, :update, :keywords, :persist, :status, :query, :flushattrs. See SEARCHD_COMMAND_* for details).

  • request (Sphinx::Request)

    contains request body.

  • additional (Integer, Array) (defaults to: nil)

    additional integer or array of integers data to be placed between header and body.

  • server (Sphinx::Server) (defaults to: nil)

    where perform request on. This is special parameter for internal usage. If specified, request will be performed on specified server, and it will try to establish connection to this server only once.

Yields:

  • if block given, response will not be parsed, plain socket will be yielded instead. This is special mode used for persistent connections, do not use for other tasks.

Yield Parameters:

  • server (Sphinx::Server)

    a server where request was performed on.

  • socket (Sphinx::BufferedIO)

    a socket used to perform the request.

Returns:

  • (Sphinx::Response)

    contains response body.

See Also:

  • #parse_response


2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
# File 'lib/sphinx/client.rb', line 2306

def perform_request(command, request, additional = nil, server = nil)
  if server
    attempts = 1
  else
    server = case request
      when String
        Zlib.crc32(request)
      when Request
        request.crc32
      else
        raise ArgumentError, "request argument must be String or Sphinx::Request"
    end
    attempts = nil
  end

  with_server(server, attempts) do |srv|
    logger.info { "[sphinx] #{command} on server #{srv}" } if logger

    cmd = command.to_s.upcase
    command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
    command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")

    with_socket(srv) do |socket|
      additional = Array(additional)
      len = request.to_s.length + (additional.size * 4)
      header = [command_id, command_ver, len].pack('nnN')
      header << additional.pack('N' * additional.size) unless additional.empty?

      socket.write(header + request.to_s)

      if block_given?
        yield srv, socket
      else
        parse_response(socket, command_ver)
      end
    end
  end
end

- (Hash, false) query(query, index = '*', comment = '') {|Client| ... } Also known as: Query

Connects to searchd server, runs given search query with current settings, obtains and returns the result set.

query is a query string. index is an index name (or names) string. Returns false and sets #last_error message on general error. Returns search result set on success. Additionally, the contents of comment are sent to the query log, marked in square brackets, just before the search terms, which can be very useful for debugging. Currently, the comment is limited to 128 characters.

Default value for index is "*" that means to query all local indexes. Characters allowed in index names include Latin letters (a-z), numbers (0-9), minus sign (-), and underscore (_); everything else is considered a separator. Therefore, all of the following samples calls are valid and will search the same two indexes:

sphinx.query('test query', 'main delta')
sphinx.query('test query', 'main;delta')
sphinx.query('test query', 'main, delta');

Index specification order matters. If document with identical IDs are found in two or more indexes, weight and attribute values from the very last matching index will be used for sorting and returning to client (unless explicitly overridden with #set_index_weights). Therefore, in the example above, matches from "delta" index will always win over matches from "main".

On success, #query returns a result set that contains some of the found matches (as requested by #set_limits) and additional general per-query statistics. The result set is an Hash with the following keys and values:

"matches"

Array with small Hashes containing document weight and attribute values.

"total"

Total amount of matches retrieved on server (ie. to the server side result set) by this query. You can retrieve up to this amount of matches from server for this query text with current query settings.

"total_found"

Total amount of matching documents in index (that were found and procesed on server).

"words"

Hash which maps query keywords (case-folded, stemmed, and otherwise processed) to a small Hash with per-keyword statitics ("docs", "hits").

"error"

Query error message reported by searchd (string, human readable). Empty if there were no errors.

"warning"

Query warning message reported by searchd (string, human readable). Empty if there were no warnings.

Please note: you can use both strings and symbols as Hash keys.

It should be noted that #query carries out the same actions as #add_query and #run_queries without the intermediate steps; it is analoguous to a single #add_query call, followed by a corresponding #run_queries, then returning the first array element of matches (from the first, and only, query.)

Examples:

Regular query with previously set filters

sphinx.query('some search text', '*', 'search page')

Query with block

sphinx.query('test') do |sphinx|
  sphinx.set_match_mode :all
  sphinx.set_id_range 10, 100
end

Query with instant filters configuring

sphinx.query('test') do
  match_mode :all
  id_range 10, 100
end

Parameters:

  • query (String)

    a query string.

  • index (String) (defaults to: '*')

    an index name (or names).

  • comment (String) (defaults to: '')

    a comment to be sent to the query log.

Yields:

  • (Client)

    yields just before query performing. Useful to set filters or sortings. When block does not accept any parameters, it will be eval'ed inside Sphinx::Client instance itself. In this case you can omit set_ prefix for configuration methods.

Yield Parameters:

Returns:

  • (Hash, false)

    result set described above or false on error.

See Also:



1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
# File 'lib/sphinx/client.rb', line 1454

def query(query, index = '*', comment = '', &block)
  @reqs = []

  if block_given?
    if block.arity > 0
      yield self
    else
      begin
        @inside_eval = true
        instance_eval(&block)
      ensure
        @inside_eval = false
      end
    end
  end

  logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger

  self.add_query(query, index, comment, false)
  results = self.run_queries

  # probably network error; error message should be already filled
  return false unless results.instance_of?(Array)

  @error = results[0]['error']
  @warning = results[0]['warning']

  return false if results[0]['status'] == SEARCHD_ERROR
  return results[0]
end

- (Sphinx::Client) reset_filters Also known as: ResetFilters

Clears all currently set filters.

This call is only normally required when using multi-queries. You might want to set different filters for different queries in the batch. To do that, you should call #reset_filters and add new filters using the respective calls.

Examples:

sphinx.reset_filters

Returns:

See Also:



1293
1294
1295
1296
1297
# File 'lib/sphinx/client.rb', line 1293

def reset_filters
  @filters = []
  @anchor = []
  self
end

- (Sphinx::Client) reset_group_by Also known as: ResetGroupBy

Clears all currently group-by settings, and disables group-by.

This call is only normally required when using multi-queries. You can change individual group-by settings using #set_group_by and #set_group_distinct calls, but you can not disable group-by using those calls. #reset_group_by fully resets previous group-by settings and disables group-by mode in the current state, so that subsequent #add_query calls can perform non-grouping searches.

Examples:

sphinx.reset_group_by

Returns:

See Also:



1317
1318
1319
1320
1321
1322
1323
# File 'lib/sphinx/client.rb', line 1317

def reset_group_by
  @groupby       = ''
  @groupfunc     = SPH_GROUPBY_DAY
  @groupsort     = '@group desc'
  @groupdistinct = ''
  self
end

- (Object) reset_outer_select Also known as: ResetOuterSelect



1353
1354
1355
1356
1357
1358
1359
# File 'lib/sphinx/client.rb', line 1353

def reset_outer_select
  @outerorderby = ''
  @outeroffset = 0
  @outerlimit = 0
  @hasouter = 0
  self
end

- (Sphinx::Client) reset_overrides Also known as: ResetOverrides

Clear all attribute value overrides (for multi-queries).

This call is only normally required when using multi-queries. You might want to set field overrides for different queries in the batch. To do that, you should call #reset_overrides and add new overrides using the respective calls.

Examples:

sphinx.reset_overrides

Returns:

See Also:



1340
1341
1342
1343
# File 'lib/sphinx/client.rb', line 1340

def reset_overrides
  @overrides = []
  self
end

- (Object) reset_query_flag Also known as: ResetQueryFlag



1346
1347
1348
1349
1350
# File 'lib/sphinx/client.rb', line 1346

def reset_query_flag
  @query_flags = 0
  @predictedtime = 0
  self
end

- (Array<Hash>) run_queries Also known as: RunQueries

Connect to searchd, runs a batch of all queries added using #add_query, obtains and returns the result sets. Returns false and sets #last_error message on general error (such as network I/O failure). Returns a plain array of result sets on success.

Each result set in the returned array is exactly the same as the result set returned from #query.

Note that the batch query request itself almost always succeds ??? unless there's a network error, blocking index rotation in progress, or another general failure which prevents the whole request from being processed.

However individual queries within the batch might very well fail. In this case their respective result sets will contain non-empty "error" message, but no matches or query statistics. In the extreme case all queries within the batch could fail. There still will be no general error reported, because API was able to succesfully connect to searchd, submit the batch, and receive the results ??? but every result set will have a specific error message.

Examples:

sphinx.add_query('some search text', '*', 'search page')
results = sphinx.run_queries

Returns:

  • (Array<Hash>)

    an Array of Hashes which are exactly the same as the result set returned from #query.

See Also:



1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
# File 'lib/sphinx/client.rb', line 1717

def run_queries
  logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger
  if @reqs.empty?
    @error = 'No queries defined, issue add_query() first'
    return false
  end

  reqs, nreqs = @reqs.join(''), @reqs.length
  @reqs = []
  response = perform_request(:search, reqs, [0, nreqs])

  # parse response
  (1..nreqs).map do
    result = HashWithIndifferentAccess.new(:error => '', :warning => '')

    # extract status
    status = result[:status] = response.get_int
    if status != SEARCHD_OK
      message = response.get_string
      if status == SEARCHD_WARNING
        result[:warning] = message
      else
        result[:error] = message
        next result
      end
    end

    # read schema
    nfields = response.get_int
    result[:fields] = (1..nfields).map { response.get_string }

    attrs_names_in_order = []
    nattrs = response.get_int
    attrs = nattrs.times.inject(HashWithIndifferentAccess.new) do |hash, idx|
      name, type = response.get_string, response.get_int
      hash[name] = type
      attrs_names_in_order << name
      hash
    end
    result[:attrs] = attrs

    # read match count
    count, id64 = response.get_ints(2)

    # read matches
    result[:matches] = (1..count).map do
      doc = id64 == 0 ? response.get_int : response.get_int64
      weight = response.get_int

      # This is a single result put in the result['matches'] array
      match = HashWithIndifferentAccess.new(:id => doc, :weight => weight)
      match[:attrs] = attrs_names_in_order.inject(HashWithIndifferentAccess.new) do |hash, name|
        hash[name] = case attrs[name]
          when SPH_ATTR_BIGINT
            # handle 64-bit ints
            response.get_int64
          when SPH_ATTR_FLOAT
            # handle floats
            response.get_float
          when SPH_ATTR_STRING
            # handle string
            response.get_string
          when SPH_ATTR_FACTORS
            # ???
            response.get_int
          when SPH_ATTR_MULTI
            # handle array of integers
            val = response.get_int
            response.get_ints(val) if val > 0
          when SPH_ATTR_MULTI64
            # handle array of 64-bit integers
            val = response.get_int
            (val / 2).times.map { response.get_int64 }
          else
            # handle everything else as unsigned ints
            response.get_int
        end
        hash
      end
      match
    end
    result[:total], result[:total_found], msecs = response.get_ints(3)
    result[:time] = '%.3f' % (msecs / 1000.0)

    nwords = response.get_int
    result[:words] = nwords.times.inject({}) do |hash, idx|
      word = response.get_string
      docs, hits = response.get_ints(2)
      hash[word] = HashWithIndifferentAccess.new(:docs => docs, :hits => hits)
      hash
    end

    result
  end
end

- (Object) set_bit(bitset, index, value) (protected)

Sets or resets given bit in a bitset.

Parameters:

  • bitset (Integer)

    integer value to set bit in.

  • index (Integer)

    integer offset of the bit to set.

  • value (Boolean, Integer)

    value to set bit into (true, false, 0, or 1).



2577
2578
2579
2580
2581
2582
2583
2584
2585
# File 'lib/sphinx/client.rb', line 2577

def set_bit(bitset, index, value)
  bit = 1 << index
  if value == true || value == 1
    bitset |= bit
  elsif bitset & bit > 0
    bitset ^= bit
  end
  bitset
end

- (Sphinx::Client) set_connect_timeout(timeout, retries = 1) Also known as: SetConnectTimeout

Sets the time allowed to spend connecting to the server before giving up and number of retries to perform.

In the event of a failure to connect, an appropriate error code should be returned back to the application in order for application-level error handling to advise the user.

When multiple servers configured through #set_servers method, and retries number is greater than 1, library will try to connect to another server. In case of single server configured, it will try to reconnect retries times.

Please note, this timeout will only be used for connection establishing, not for regular API requests.

Examples:

Set connection timeout to 1 second and number of retries to 5

sphinx.set_connect_timeout(1, 5)

Parameters:

  • timeout (Integer)

    a connection timeout in seconds.

  • retries (Integer) (defaults to: 1)

    number of connect retries.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



333
334
335
336
337
338
339
340
341
# File 'lib/sphinx/client.rb', line 333

def set_connect_timeout(timeout, retries = 1)
  raise ArgumentError, '"timeout" argument must be Integer'        unless timeout.kind_of?(Integer)
  raise ArgumentError, '"retries" argument must be Integer'        unless retries.kind_of?(Integer)
  raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0

  @timeout = timeout
  @retries = retries
  self
end

- (Sphinx::Client) set_field_weights(weights) Also known as: SetFieldWeights

Binds per-field weights by name. Parameter must be a Hash mapping string field names to integer weights.

Match ranking can be affected by per-field weights. For instance, see Section 4.4, "Weighting" for an explanation how phrase proximity ranking is affected. This call lets you specify what non-default weights to assign to different full-text fields.

The weights must be positive 32-bit integers. The final weight will be a 32-bit integer too. Default weight value is 1. Unknown field names will be silently ignored.

There is no enforced limit on the maximum weight value at the moment. However, beware that if you set it too high you can start hitting 32-bit wraparound issues. For instance, if you set a weight of 10,000,000 and search in extended mode, then maximum possible weight will be equal to 10 million (your weight) by 1 thousand (internal BM25 scaling factor, see Section 4.4, ???Weighting???) by 1 or more (phrase proximity rank). The result is at least 10 billion that does not fit in 32 bits and will be wrapped around, producing unexpected results.

Examples:

sphinx.set_field_weights(:title => 20, :text => 10)

Parameters:

  • weights (Hash)

    a Hash mapping string field names to integer weights.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



882
883
884
885
886
887
888
889
890
891
892
# File 'lib/sphinx/client.rb', line 882

def set_field_weights(weights)
  raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
  weights.each do |name, weight|
    unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer)
      raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
    end
  end

  @fieldweights = weights
  self
end

- (Sphinx::Client) set_filter(attribute, values, exclude = false) Also known as: SetFilter

Adds new integer values set filter.

On this call, additional new filter is added to the existing list of filters. $attribute must be a string with attribute name. values must be a plain array containing integer values. exclude must be a boolean value; it controls whether to accept the matching documents (default mode, when exclude is false) or reject them.

Only those documents where attribute column value stored in the index matches any of the values from values array will be matched (or rejected, if exclude is true).

Examples:

sphinx.set_filter(:group_id, [10, 15, 20])
sphinx.set_filter(:group_id, [10, 15, 20], true)

Parameters:

  • attribute (String, Symbol)

    an attribute name to filter by.

  • values (Array<Integer>, Integer)

    an Array of integers or single Integer with given attribute values.

  • exclude (Boolean) (defaults to: false)

    indicating whether documents with given attribute matching specified values should be excluded from search results.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
# File 'lib/sphinx/client.rb', line 1004

def set_filter(attribute, values, exclude = false)
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
  values = [values] if values.kind_of?(Integer)
  raise ArgumentError, '"values" argument must be Array'               unless values.kind_of?(Array)
  raise ArgumentError, '"values" argument must be Array of Integers'   unless values.all? { |v| v.kind_of?(Integer) }
  raise ArgumentError, '"exclude" argument must be Boolean'            unless [TrueClass, FalseClass].include?(exclude.class)

  if values.any?
    @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
  end
  self
end

- (Sphinx::Client) set_filter_float_range(attribute, min, max, exclude = false) Also known as: SetFilterFloatRange

Adds new float range filter.

On this call, additional new filter is added to the existing list of filters. attribute must be a string with attribute name. min and max must be floats that define the acceptable attribute values range (including the boundaries). exclude must be a boolean value; it controls whether to accept the matching documents (default mode, when exclude is false) or reject them.

Only those documents where attribute column value stored in the index is between min and max (including values that are exactly equal to min or max) will be matched (or rejected, if exclude is true).

Examples:

sphinx.set_filter_float_range(:group_id, 10.5, 20)
sphinx.set_filter_float_range(:group_id, 10.5, 20, true)

Parameters:

  • attribute (String, Symbol)

    an attribute name to filter by.

  • min (Numeric)

    min value of the given attribute.

  • max (Numeric)

    max value of the given attribute.

  • exclude (Boolean) (defaults to: false)

    indicating whether documents with given attribute matching specified boundaries should be excluded from search results.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
# File 'lib/sphinx/client.rb', line 1093

def set_filter_float_range(attribute, min, max, exclude = false)
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
  raise ArgumentError, '"min" argument must be Numeric'                unless min.kind_of?(Numeric)
  raise ArgumentError, '"max" argument must be Numeric'                unless max.kind_of?(Numeric)
  raise ArgumentError, '"max" argument greater or equal to "min"'      unless min <= max
  raise ArgumentError, '"exclude" argument must be Boolean'            unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)

  @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
  self
end

- (Sphinx::Client) set_filter_range(attribute, min, max, exclude = false) Also known as: SetFilterRange

Adds new integer range filter.

On this call, additional new filter is added to the existing list of filters. attribute must be a string with attribute name. min and max must be integers that define the acceptable attribute values range (including the boundaries). exclude must be a boolean value; it controls whether to accept the matching documents (default mode, when exclude is false) or reject them.

Only those documents where attribute column value stored in the index is between min and max (including values that are exactly equal to min or max) will be matched (or rejected, if exclude is true).

Examples:

sphinx.set_filter_range(:group_id, 10, 20)
sphinx.set_filter_range(:group_id, 10, 20, true)

Parameters:

  • attribute (String, Symbol)

    an attribute name to filter by.

  • min (Integer)

    min value of the given attribute.

  • max (Integer)

    max value of the given attribute.

  • exclude (Boolean) (defaults to: false)

    indicating whether documents with given attribute matching specified boundaries should be excluded from search results.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
# File 'lib/sphinx/client.rb', line 1050

def set_filter_range(attribute, min, max, exclude = false)
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
  raise ArgumentError, '"min" argument must be Integer'                unless min.kind_of?(Integer)
  raise ArgumentError, '"max" argument must be Integer'                unless max.kind_of?(Integer)
  raise ArgumentError, '"max" argument greater or equal to "min"'      unless min <= max
  raise ArgumentError, '"exclude" argument must be Boolean'            unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)

  @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
  self
end

- (Sphinx::Client) set_geo_anchor(attrlat, attrlong, lat, long) Also known as: SetGeoAnchor

Sets anchor point for and geosphere distance (geodistance) calculations, and enable them.

attrlat and attrlong must be strings that contain the names of latitude and longitude attributes, respectively. lat and long are floats that specify anchor point latitude and longitude, in radians.

Once an anchor point is set, you can use magic "@geodist" attribute name in your filters and/or sorting expressions. Sphinx will compute geosphere distance between the given anchor point and a point specified by latitude and lognitude attributes from each full-text match, and attach this value to the resulting match. The latitude and longitude values both in #set_geo_anchor and the index attribute data are expected to be in radians. The result will be returned in meters, so geodistance value of 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.

Examples:

sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5)

Parameters:

  • attrlat (String, Symbol)

    a name of latitude attribute.

  • attrlong (String, Symbol)

    a name of longitude attribute.

  • lat (Numeric)

    an anchor point latitude, in radians.

  • long (Numeric)

    an anchor point longitude, in radians.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1136
1137
1138
1139
1140
1141
1142
1143
1144
# File 'lib/sphinx/client.rb', line 1136

def set_geo_anchor(attrlat, attrlong, lat, long)
  raise ArgumentError, '"attrlat" argument must be String or Symbol'  unless attrlat.kind_of?(String)  or attrlat.kind_of?(Symbol)
  raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
  raise ArgumentError, '"lat" argument must be Numeric'               unless lat.kind_of?(Numeric)
  raise ArgumentError, '"long" argument must be Numeric'              unless long.kind_of?(Numeric)

  @anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
  self
end

- (Sphinx::Client) set_group_by(attribute, func, groupsort = '@group desc') Also known as: SetGroupBy

Sets grouping attribute, function, and groups sorting mode; and enables grouping (as described in Section 4.6, "Grouping (clustering) search results").

attribute is a string that contains group-by attribute name. func is a constant that chooses a function applied to the attribute value in order to compute group-by key. groupsort is a clause that controls how the groups will be sorted. Its syntax is similar to that described in Section 4.5, "SPH_SORT_EXTENDED mode".

Grouping feature is very similar in nature to GROUP BY clause from SQL. Results produces by this function call are going to be the same as produced by the following pseudo code:

SELECT ... GROUP BY func(attribute) ORDER BY groupsort

Note that it's groupsort that affects the order of matches in the final result set. Sorting mode (see #set_sort_mode) affect the ordering of matches within group, ie. what match will be selected as the best one from the group. So you can for instance order the groups by matches count and select the most relevant match within each group at the same time.

Starting with version 0.9.9-rc2, aggregate functions (AVG(), MIN(), MAX(), SUM()) are supported through #set_select API call when using GROUP BY.

You can specify group function and attribute as String ("attr", "day", etc), Symbol (:attr, :day, etc), or Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc).

Examples:

sphinx.set_group_by(:tag_id, :attr)

Parameters:

  • attribute (String, Symbol)

    an attribute name to group by.

  • func (Integer, String, Symbol)

    a grouping function.

  • groupsort (String) (defaults to: '@group desc')

    a groups sorting mode.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
# File 'lib/sphinx/client.rb', line 1199

def set_group_by(attribute, func, groupsort = '@group desc')
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String)  or attribute.kind_of?(Symbol)
  raise ArgumentError, '"groupsort" argument must be String'           unless groupsort.kind_of?(String)

  case func
    when String, Symbol
      begin
        func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
      rescue NameError
        raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
      end
    when Fixnum
      raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
    else
      raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
  end

  @groupby = attribute.to_s
  @groupfunc = func
  @groupsort = groupsort
  self
end

- (Sphinx::Client) set_group_distinct(attribute) Also known as: SetGroupDistinct

Sets attribute name for per-group distinct values count calculations. Only available for grouping queries.

attribute is a string that contains the attribute name. For each group, all values of this attribute will be stored (as RAM limits permit), then the amount of distinct values will be calculated and returned to the client. This feature is similar to COUNT(DISTINCT) clause in standard SQL; so these Sphinx calls:

sphinx.set_group_by(:category, :attr, '@count desc')
sphinx.set_group_distinct(:vendor)

can be expressed using the following SQL clauses:

SELECT id, weight, all-attributes,
  COUNT(DISTINCT vendor) AS @distinct,
  COUNT(*) AS @count
FROM products
GROUP BY category
ORDER BY @count DESC

In the sample pseudo code shown just above, #set_group_distinct call corresponds to COUNT(DISINCT vendor) clause only. GROUP BY, ORDER BY, and COUNT(*) clauses are all an equivalent of #set_group_by settings. Both queries will return one matching row for each category. In addition to indexed attributes, matches will also contain total per-category matches count, and the count of distinct vendor IDs within each category.

Examples:

sphinx.set_group_distinct(:category_id)

Parameters:

  • attribute (String, Symbol)

    an attribute name.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



1265
1266
1267
1268
1269
1270
# File 'lib/sphinx/client.rb', line 1265

def set_group_distinct(attribute)
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String)  or attribute.kind_of?(Symbol)

  @groupdistinct = attribute.to_s
  self
end

- (Sphinx::Client) set_id_range(min, max) Also known as: SetIDRange

Sets an accepted range of document IDs. Parameters must be integers. Defaults are 0 and 0; that combination means to not limit by range.

After this call, only those records that have document ID between min and max (including IDs exactly equal to min or max) will be matched.

Examples:

sphinx.set_id_range(10, 1000)

Parameters:

  • min (Integer)

    min document ID.

  • min (Integer)

    max document ID.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



963
964
965
966
967
968
969
970
971
# File 'lib/sphinx/client.rb', line 963

def set_id_range(min, max)
  raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
  raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
  raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max

  @min_id = min
  @max_id = max
  self
end

- (Sphinx::Client) set_index_weights(weights) Also known as: SetIndexWeights

Sets per-index weights, and enables weighted summing of match weights across different indexes. Parameter must be a hash (associative array) mapping string index names to integer weights. Default is empty array that means to disable weighting summing.

When a match with the same document ID is found in several different local indexes, by default Sphinx simply chooses the match from the index specified last in the query. This is to support searching through partially overlapping index partitions.

However in some cases the indexes are not just partitions, and you might want to sum the weights across the indexes instead of picking one. #set_index_weights lets you do that. With summing enabled, final match weight in result set will be computed as a sum of match weight coming from the given index multiplied by respective per-index weight specified in this call. Ie. if the document 123 is found in index A with the weight of 2, and also in index B with the weight of 3, and you called #set_index_weights with {"A"=>100, "B"=>10}, the final weight return to the client will be 2*100+3*10 = 230.

Examples:

sphinx.set_field_weights(:fresh => 20, :archived => 10)

Parameters:

  • weights (Hash)

    a Hash mapping string index names to integer weights.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



928
929
930
931
932
933
934
935
936
937
938
# File 'lib/sphinx/client.rb', line 928

def set_index_weights(weights)
  raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
  weights.each do |index, weight|
    unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer)
      raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
    end
  end

  @indexweights = weights
  self
end

- (Sphinx::Client) set_limits(offset, limit, max = 0, cutoff = 0) Also known as: SetLimits

Sets offset into server-side result set (offset) and amount of matches to return to client starting from that offset (limit). Can additionally control maximum server-side result set size for current query (max_matches) and the threshold amount of matches to stop searching at (cutoff). All parameters must be non-negative integers.

First two parameters to #set_limits are identical in behavior to MySQL LIMIT clause. They instruct searchd to return at most limit matches starting from match number offset. The default offset and limit settings are 0 and 20, that is, to return first 20 matches.

max_matches setting controls how much matches searchd will keep in RAM while searching. All matching documents will be normally processed, ranked, filtered, and sorted even if max_matches is set to 1. But only best N documents are stored in memory at any given moment for performance and RAM usage reasons, and this setting controls that N. Note that there are two places where max_matches limit is enforced. Per-query limit is controlled by this API call, but there also is per-server limit controlled by max_matches setting in the config file. To prevent RAM usage abuse, server will not allow to set per-query limit higher than the per-server limit.

You can't retrieve more than max_matches matches to the client application. The default limit is set to 1000. Normally, you must not have to go over this limit. One thousand records is enough to present to the end user. And if you're thinking about pulling the results to application for further sorting or filtering, that would be much more efficient if performed on Sphinx side.

cutoff setting is intended for advanced performance control. It tells searchd to forcibly stop search query once $cutoff matches had been found and processed.

Examples:

sphinx.set_limits(100, 50, 1000, 5000)

Parameters:

  • offset (Integer)

    an offset into server-side result set.

  • limit (Integer)

    an amount of matches to return.

  • max (Integer) (defaults to: 0)

    a maximum server-side result set size.

  • cutoff (Integer) (defaults to: 0)

    a threshold amount of matches to stop searching at.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.



459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/sphinx/client.rb', line 459

def set_limits(offset, limit, max = 0, cutoff = 0)
  raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
  raise ArgumentError, '"limit" argument must be Integer'  unless limit.kind_of?(Integer)
  raise ArgumentError, '"max" argument must be Integer'    unless max.kind_of?(Integer)
  raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer)

  raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
  raise ArgumentError, '"limit" argument should be greater to zero'           unless limit > 0
  raise ArgumentError, '"max" argument should be greater or equal to zero'    unless max >= 0
  raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0

  @offset = offset
  @limit = limit
  @maxmatches = max if max > 0
  @cutoff = cutoff if cutoff > 0
  self
end

- (Sphinx::Client) set_match_mode(mode) Also known as: SetMatchMode

Sets full-text query matching mode.

Parameter must be a Fixnum constant specifying one of the known modes (SPH_MATCH_ALL, SPH_MATCH_ANY, etc), String with identifier ("all", "any", etc), or a Symbol (:all, :any, etc).

Examples:

sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL)
sphinx.set_match_mode(:all)
sphinx.set_match_mode('all')

Parameters:

  • mode (Integer, String, Symbol)

    full-text query matching mode.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
# File 'lib/sphinx/client.rb', line 698

def set_match_mode(mode)
  case mode
    when String, Symbol
      begin
        mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
      rescue NameError
        raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
      end
    when Fixnum
      raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
    else
      raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
  end

  @mode = mode
  self
end

- (Sphinx::Client) set_max_query_time(max) Also known as: SetMaxQueryTime

Sets maximum search query time, in milliseconds. Parameter must be a non-negative integer. Default valus is 0 which means "do not limit".

Similar to cutoff setting from #set_limits, but limits elapsed query time instead of processed matches count. Local search queries will be stopped once that much time has elapsed. Note that if you're performing a search which queries several local indexes, this limit applies to each index separately.

Examples:

sphinx.set_max_query_time(200)

Parameters:

  • max (Integer)

    maximum search query time in milliseconds.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.



495
496
497
498
499
500
501
# File 'lib/sphinx/client.rb', line 495

def set_max_query_time(max)
  raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
  raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0

  @maxquerytime = max
  self
end

- (Object) set_outer_select(orderby, offset, limit) Also known as: SetOuterSelect

Raises:

  • (ArgumentError)


659
660
661
662
663
664
665
666
667
668
669
670
671
672
# File 'lib/sphinx/client.rb', line 659

def set_outer_select(orderby, offset, limit)
  raise ArgumentError, '"orderby" argument must be String' unless orderby.kind_of?(String)
  raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
  raise ArgumentError, '"limit" argument must be Integer'  unless limit.kind_of?(Integer)

  raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
  raise ArgumentError, '"limit" argument should be greater to zero'           unless limit > 0

  @outerorderby = orderby
  @outeroffset = offset
  @outerlimit = limit
  @hasouter = true
  self
end

- (Sphinx::Client) set_override(attribute, attrtype, values) Also known as: SetOverride

Sets temporary (per-query) per-document attribute value overrides. Only supports scalar attributes. values must be a Hash that maps document IDs to overridden attribute values.

Override feature lets you "temporary" update attribute values for some documents within a single query, leaving all other queries unaffected. This might be useful for personalized data. For example, assume you're implementing a personalized search function that wants to boost the posts that the user's friends recommend. Such data is not just dynamic, but also personal; so you can't simply put it in the index because you don't want everyone's searches affected. Overrides, on the other hand, are local to a single query and invisible to everyone else. So you can, say, setup a "friends_weight" value for every document, defaulting to 0, then temporary override it with 1 for documents 123, 456 and 789 (recommended by exactly the friends of current user), and use that value when ranking.

You can specify attribute type as String ("integer", "float", etc), Symbol (:integer, :float, etc), or Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc).

Examples:

sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1})

Parameters:

  • attribute (String, Symbol)

    an attribute name to override values of.

  • attrtype (Integer, String, Symbol)

    attribute type.

  • values (Hash)

    a Hash that maps document IDs to overridden attribute values.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# File 'lib/sphinx/client.rb', line 536

def set_override(attribute, attrtype, values)
  raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)

  case attrtype
    when String, Symbol
      begin
        attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
      rescue NameError
        raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
      end
    when Fixnum
      raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
    else
      raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
  end

  raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)

  values.each do |id, value|
    raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer)
    case attrtype
      when SPH_ATTR_TIMESTAMP
        raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time)
      when SPH_ATTR_FLOAT
        raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric)
      else
        # SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
        raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer)
    end
  end

  @overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values }
  self
end

- (Sphinx::Client) set_query_flag(flag_name, flag_value) Also known as: SetQueryFlag

Allows to control a number of per-query options.

Supported options and respectively allowed values are:

  • reverse_scan -- 0 or 1, lets you control the order in which full-scan query processes the rows.

  • sort_method -- "pq" (priority queue, set by default) or "kbuffer" (gives faster sorting for already pre-sorted data, e.g. index data sorted by id). The result set is in both cases the same; picking one option or the other may just improve (or worsen!) performance.

  • boolean_simplify -- false or true, enables simplifying the query to speed it up.

  • idf -- either "normalized" (default) or "plain".

Parameters:

  • flag_name (String)

    name of the option to set value for

  • flag_value (Object)

    value to set

Returns:

Raises:

  • (ArgumentError)

See Also:



639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
# File 'lib/sphinx/client.rb', line 639

def set_query_flag(flag_name, flag_value)
  raise ArgumentError, 'unknown "flag_name" argument value' unless QUERY_FLAGS.has_key?(flag_name)

  flag   = QUERY_FLAGS[flag_name]
  values = QUERY_FLAGS[flag_name][:values]

  if flag_name.to_s == 'max_predicted_time'
    raise ArgumentError, "\"flag_value\" should be a positive integer for \"max_predicted_time\" flag" unless flag_value.kind_of?(Integer) and flag_value >= 0

    @predictedtime = flag_value
  elsif !values.include?(flag_value)
    raise ArgumentError, "unknown \"flag_value\", should be one of #{values.inspect}"
  end

  is_set = values.respond_to?(:call) ? values.call(flag_value) : values.index(flag_value) == 1
  @query_flags = set_bit(@query_flags, flag[:index], is_set)
  self
end

- (Sphinx::Client) set_ranking_mode(ranker, rankexpr = '') Also known as: SetRankingMode

Sets ranking mode. Only available in SPH_MATCH_EXTENDED2 matching mode at the time of this writing. Parameter must be a constant specifying one of the known modes.

By default, in the EXTENDED matching mode Sphinx computes two factors which contribute to the final match weight. The major part is a phrase proximity value between the document text and the query. The minor part is so-called BM25 statistical function, which varies from 0 to 1 depending on the keyword frequency within document (more occurrences yield higher weight) and within the whole index (more rare keywords yield higher weight).

However, in some cases you'd want to compute weight differently - or maybe avoid computing it at all for performance reasons because you're sorting the result set by something else anyway. This can be accomplished by setting the appropriate ranking mode.

You can specify ranking mode as String ("proximity_bm25", "bm25", etc), Symbol (:proximity_bm25, :bm25, etc), or Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).

Examples:

sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25)
sphinx.set_ranking_mode(:bm25)
sphinx.set_ranking_mode('bm25')
sphinx.set_ranking_mode(:expr, 'sum(lcs*user_weight)*1000+bm25')

Parameters:

  • ranker (Integer, String, Symbol)

    ranking mode.

  • rankexpr (String) (defaults to: '')

    ranking formula to use with the expression based ranker (SPH_RANK_EXPR).

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
# File 'lib/sphinx/client.rb', line 755

def set_ranking_mode(ranker, rankexpr = '')
  case ranker
    when String, Symbol
      const_name = "SPH_RANK_#{ranker.to_s.upcase}"
      unless self.class.const_defined?(const_name)
        raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
      end

      ranker = self.class.const_get(const_name)
    when Fixnum
      raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_SPH04).include?(ranker)
    else
      raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
  end

  raise ArgumentError, '"rankexpr" argument must be String' unless rankexpr.kind_of?(String)
  raise ArgumentError, '"rankexpr" should not be empty if ranker is SPH_RANK_EXPR' if ranker == SPH_RANK_EXPR and rankexpr.empty?

  @ranker = ranker
  @rankexpr = rankexpr
  self
end

- (Sphinx::Client) set_request_timeout(timeout, retries = 1) Also known as: SetRequestTimeout

Sets the time allowed to spend performing request to the server before giving up and number of retries to perform.

In the event of a failure to do request, an appropriate error code should be returned back to the application in order for application-level error handling to advise the user.

When multiple servers configured through #set_servers method, and retries number is greater than 1, library will try to do another try with this server (with full reconnect). If connection would fail, behavior depends on #set_connect_timeout settings.

Please note, this timeout will only be used for request performing, not for connection establishing.

Examples:

Set request timeout to 1 second and number of retries to 5

sphinx.set_request_timeout(1, 5)

Parameters:

  • timeout (Integer)

    a request timeout in seconds.

  • retries (Integer) (defaults to: 1)

    number of request retries.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



371
372
373
374
375
376
377
378
379
# File 'lib/sphinx/client.rb', line 371

def set_request_timeout(timeout, retries = 1)
  raise ArgumentError, '"timeout" argument must be Integer'        unless timeout.kind_of?(Integer)
  raise ArgumentError, '"retries" argument must be Integer'        unless retries.kind_of?(Integer)
  raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0

  @reqtimeout = timeout
  @reqretries = retries
  self
end

- (Sphinx::Client) set_retries(count, delay = 0) Also known as: SetRetries

Sets distributed retry count and delay.

On temporary failures searchd will attempt up to count retries per agent. delay is the delay between the retries, in milliseconds. Retries are disabled by default. Note that this call will not make the API itself retry on temporary failure; it only tells searchd to do so. Currently, the list of temporary failures includes all kinds of connection failures and maxed out (too busy) remote agents.

Examples:

Perform 5 retries with 200 ms between them

sphinx.set_retries(5, 200)

Parameters:

  • count (Integer)

    a number of retries to perform.

  • delay (Integer) (defaults to: 0)

    a delay between the retries.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



402
403
404
405
406
407
408
409
# File 'lib/sphinx/client.rb', line 402

def set_retries(count, delay = 0)
  raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer)
  raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer)

  @retrycount = count
  @retrydelay = delay
  self
end

- (Sphinx::Client) set_select(select) Also known as: SetSelect

Sets the select clause, listing specific attributes to fetch, and expressions to compute and fetch. Clause syntax mimics SQL.

#set_select is very similar to the part of a typical SQL query between SELECT and FROM. It lets you choose what attributes (columns) to fetch, and also what expressions over the columns to compute and fetch. A certain difference from SQL is that expressions must always be aliased to a correct identifier (consisting of letters and digits) using AS keyword. SQL also lets you do that but does not require to. Sphinx enforces aliases so that the computation results can always be returned under a "normal" name in the result set, used in other clauses, etc.

Everything else is basically identical to SQL. Star ('*') is supported. Functions are supported. Arbitrary amount of expressions is supported. Computed expressions can be used for sorting, filtering, and grouping, just as the regular attributes.

Starting with version 0.9.9-rc2, aggregate functions (AVG(), MIN(), MAX(), SUM()) are supported when using GROUP BY.

Expression sorting (Section 4.5, ???SPH_SORT_EXPR mode???) and geodistance functions (#set_geo_anchor) are now internally implemented using this computed expressions mechanism, using magic names '@expr' and '@geodist' respectively.

Examples:

sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
sphinx.set_select('*, AVG(price) AS avgprice')

Parameters:

  • select (String)

    a select clause, listing specific attributes to fetch.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



612
613
614
615
616
617
# File 'lib/sphinx/client.rb', line 612

def set_select(select)
  raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)

  @select = select
  self
end

- (Sphinx::Client) set_server(host, port = 9312) Also known as: SetServer

Sets searchd host name and TCP port. All subsequent requests will use the new host and port settings. Default host and port are 'localhost' and 9312, respectively.

Also, you can specify an absolute path to Sphinx's UNIX socket as host, in this case pass port as 0 or nil.

Examples:

sphinx.set_server('localhost', 9312)
sphinx.set_server('/opt/sphinx/var/run/sphinx.sock')

Parameters:

  • host (String)

    the searchd host name or UNIX socket absolute path.

  • port (Integer) (defaults to: 9312)

    the searchd port name (could be any if UNIX socket path specified).

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/sphinx/client.rb', line 225

def set_server(host, port = 9312)
  raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)

  path = nil
  # Check if UNIX socket should be used
  if host[0] == ?/
    path = host
  elsif host[0, 7] == 'unix://'
    path = host[7..-1]
  else
    raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
  end

  host = port = nil unless path.nil?

  @servers = [Sphinx::Server.new(self, host, port, path)].freeze
  logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
  self
end

- (Sphinx::Client) set_servers(servers) Also known as: SetServers

Sets the list of searchd servers. Each subsequent request will use next server in list (round-robin). In case of one server failure, request could be retried on another server (see #set_connect_timeout and #set_request_timeout).

Method accepts an Array of Hashes, each of them should have :host and :port (to connect to searchd through network) or :path (an absolute path to UNIX socket) specified.

Examples:

sphinx.set_servers([
  { :host => 'browse01.local' }, # default port is 9312
  { :host => 'browse02.local', :port => 9312 },
  { :path => '/opt/sphinx/var/run/sphinx.sock' }
])

Parameters:

  • servers (Array<Hash>)

    an Array of Hash objects with servers parameters.

Options Hash (servers):

  • :host (String)

    the searchd host name or UNIX socket absolute path.

  • :path (String)

    the searchd UNIX socket absolute path.

  • :port (Integer) — default: 9312

    the searchd port name (skiped when UNIX socket path specified)

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
# File 'lib/sphinx/client.rb', line 274

def set_servers(servers)
  raise ArgumentError, '"servers" argument must be Array'     unless servers.kind_of?(Array)
  raise ArgumentError, '"servers" argument must be not empty' if servers.empty?

  @servers = servers.map do |server|
    raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)

    server = server.with_indifferent_access

    host = server[:path] || server[:host]
    port = server[:port] || 9312
    path = nil
    raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)

    # Check if UNIX socket should be used
    if host[0] == ?/
      path = host
    elsif host[0, 7] == 'unix://'
      path = host[7..-1]
    else
      raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
    end

    host = port = nil unless path.nil?

    Sphinx::Server.new(self, host, port, path)
  end.freeze
  logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
  self
end

- (Sphinx::Client) set_sort_mode(mode, sortby = '') Also known as: SetSortMode

Set matches sorting mode.

You can specify sorting mode as String ("relevance", "attr_desc", etc), Symbol (:relevance, :attr_desc, etc), or Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).

Examples:

sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr')
sphinx.set_sort_mode(:attr_asc, 'attr')
sphinx.set_sort_mode('attr_asc', 'attr')

Parameters:

  • mode (Integer, String, Symbol)

    matches sorting mode.

  • sortby (String) (defaults to: '')

    sorting clause, with the syntax depending on specific mode. Should be specified unless sorting mode is SPH_SORT_RELEVANCE.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
# File 'lib/sphinx/client.rb', line 801

def set_sort_mode(mode, sortby = '')
  case mode
    when String, Symbol
      begin
        mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
      rescue NameError
        raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
      end
    when Fixnum
      raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
    else
      raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
  end

  raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
  raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?

  @sort = mode
  @sortby = sortby
  self
end

- (Sphinx::Client) set_weights(weights) Also known as: SetWeights

Deprecated.

Use #set_field_weights instead.

Binds per-field weights in the order of appearance in the index.

Examples:

sphinx.set_weights([1, 3, 5])

Parameters:

  • weights (Array<Integer>)

    an Array of integer per-field weights.

Returns:

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



837
838
839
840
841
842
843
844
845
# File 'lib/sphinx/client.rb', line 837

def set_weights(weights)
  raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
  weights.each do |weight|
    raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer)
  end

  @weights = weights
  self
end

- (Array<Array>, Array<Hash>) status Also known as: Status

Queries searchd status, and returns an array of status variable name and value pairs.

Examples:

Single server

status = sphinx.status
puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }

Multiple servers

sphinx.set_servers([
  { :host => 'localhost' },
  { :host => 'browse02.local' }
])
sphinx.status.each do |report|
  puts "=== #{report[:server]}"
  if report[:error]
    puts "Error: #{report[:error]}"
  else
    puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" }
  end
end

Returns:

  • (Array<Array>, Array<Hash>)

    a table containing searchd status information. If there are more than one server configured (#set_servers), an Array of Hashes will be returned, one for each server. Hash will contain :server element with string name of server (host:port) and :status table just like one for a single server. In case of any error, it will be stored in the :error key.



2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
# File 'lib/sphinx/client.rb', line 2156

def status
  request = Request.new
  request.put_int(1)

  # parse response
  results = @servers.map do |server|
    begin
      response = perform_request(:status, request, nil, server)
      rows, cols = response.get_ints(2)
      status = (0...rows).map do
        (0...cols).map { response.get_string }
      end
      HashWithIndifferentAccess.new(:server => server.to_s, :status => status)
    rescue SphinxError
      # Re-raise error when a single server configured
      raise if @servers.size == 1
      HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error)
    end
  end

  @servers.size > 1 ? results : results.first[:status]
end

- (Integer) update_attributes(index, attrs, values, mva = false, ignore_non_existent = false) Also known as: UpdateAttributes

Instantly updates given attribute values in given documents. Returns number of actually updated documents (0 or more) on success, or -1 on failure.

index is a name of the index (or indexes) to be updated. attrs is a plain array with string attribute names, listing attributes that are updated. values is a Hash where key is document ID, and value is a plain array of new attribute values.

index can be either a single index name or a list, like in #query. Unlike #query, wildcard is not allowed and all the indexes to update must be specified explicitly. The list of indexes can include distributed index names. Updates on distributed indexes will be pushed to all agents.

The updates only work with docinfo=extern storage strategy. They are very fast because they're working fully in RAM, but they can also be made persistent: updates are saved on disk on clean searchd shutdown initiated by SIGTERM signal. With additional restrictions, updates are also possible on MVA attributes; refer to mva_updates_pool directive for details.

The first sample statement will update document 1 in index "test1", setting "group_id" to 456. The second one will update documents 1001, 1002 and 1003 in index "products". For document 1001, the new price will be set to 123 and the new amount in stock to 5; for document 1002, the new price will be 37 and the new amount will be 11; etc. The third one updates document 1 in index "test2", setting MVA attribute "group_id" to [456, 789].

Examples:

sphinx.update_attributes("test1", ["group_id"], { 1 => [456] });
sphinx.update_attributes("products", ["price", "amount_in_stock"],
  { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] });
sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true)

Parameters:

  • index (String)

    a name of the index to be updated.

  • attrs (Array<String>)

    an array of attribute name strings.

  • values (Hash)

    is a hash where key is document id, and value is an array of new attribute values.

  • mva (Boolean) (defaults to: false)

    indicating whether to update MVA.

Returns:

  • (Integer)

    number of actually updated documents (0 or more) on success, -1 on failure.

Raises:

  • (ArgumentError)

    Occurred when parameters are invalid.

See Also:



2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
# File 'lib/sphinx/client.rb', line 2052

def update_attributes(index, attrs, values, mva = false, ignore_non_existent = false)
  # verify everything
  raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
  raise ArgumentError, '"mva" argument must be Boolean'  unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
  raise ArgumentError, '"ignore_non_existent" argument must be Boolean'  unless ignore_non_existent.kind_of?(TrueClass) or ignore_non_existent.kind_of?(FalseClass)

  raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
  attrs.each do |attr|
    raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
  end

  raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
  values.each do |id, entry|
    raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer)
    raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
    raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
    entry.each do |v|
      if mva
        raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
        v.each do |vv|
          raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer)
        end
      else
        raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer)
      end
    end
  end

  # build request
  request = Request.new
  request.put_string index

  request.put_int attrs.length
  request.put_int ignore_non_existent ? 1 : 0
  for attr in attrs
    request.put_string attr
    request.put_int mva ? 1 : 0
  end

  request.put_int values.length
  values.each do |id, entry|
    request.put_int64 id
    if mva
      entry.each { |v| request.put_int_array v }
    else
      request.put_int(*entry)
    end
  end

  response = perform_request(:update, request)

  # parse response
  response.get_int
end

- (Object) with_server(server_index) (protected) - (Object) with_server(server) (protected)

This is internal method which selects next server (round-robin) and yields it to the block passed.

In case of connection error, it will try next server several times (see #set_connect_timeout method details). If all servers are down, it will set error attribute (could be retrieved with #last_error method) with the last exception message, and #connect_error? method will return true. Also, SphinxConnectError exception will be raised.

Overloads:

  • - (Object) with_server(server_index)

    Get the server based on some seed value (usually CRC32 of request. In this case initial server will be choosed using this seed value, in case of connetion failure next server in servers list will be used).

    Parameters:

    • server_index (Integer)

      server index, must be any integer value (not necessarily less than number of servers.)

    • attempts (Integer)

      how many retries to perform. Use nil to perform retries configured with #set_connect_timeout.

  • - (Object) with_server(server)

    Get the server specified as a parameter. If specified, request will be performed on specified server, and it will try to establish connection to this server only once.

    Parameters:

    • server (Server)

      server to perform request on.

    • attempts (Integer)

      how many retries to perform. Use nil to perform retries configured with #set_connect_timeout.

Yields:

  • a block which performs request on a given server.

Yield Parameters:

  • server (Sphinx::Server)

    contains information about the server to perform request on.

Raises:



2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
# File 'lib/sphinx/client.rb', line 2447

def with_server(server = nil, attempts = nil)
  case server
    when Server
      idx = @servers.index(server) || 0
      s = server
    when Integer
      idx = server % @servers.size
      s = @servers[idx]
    when NilClass
      idx = 0
      s = @servers[idx]
    else
      raise ArgumentError, 'server argument must be Integer or Sphinx::Server'
  end
  attempts ||= @retries
  begin
    yield s
  rescue SphinxConnectError => e
    logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger
    # Connection error! Do we need to try it again?
    attempts -= 1
    if attempts > 0
      logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger
      # Get the next server
      idx = (idx + 1) % @servers.size
      s = @servers[idx]
      retry
    end

    # Re-raise original exception
    @error = e.message
    @connerror = true
    raise
  end
end

- (Object) with_socket(server) {|socket| ... } (protected)

This is internal method which retrieves socket for a given server, initiates Sphinx session, and yields this socket to a block passed.

In case of any problems with session initiation, SphinxConnectError will be raised, because this is part of connection establishing. See #with_server method details to get more infromation about how this exception is handled.

Socket retrieving routine is wrapped in a block with it's own timeout value (see #set_connect_timeout). This is done in Server#get_socket method, so check it for details.

Request execution is wrapped with block with another timeout (see #set_request_timeout). This ensures no Sphinx request will take unreasonable time.

In case of any Sphinx error (incomplete reply, internal or temporary error), connection to the server will be re-established, and request will be retried (see #set_request_timeout). Of course, if connection could not be established, next server will be selected (see explanation above).

Parameters:

  • server (Sphinx::Server)

    contains information about the server to perform request on.

Yields:

  • a block which will actually perform the request.

Yield Parameters:

  • socket (Sphinx::BufferedIO)

    a socket used to perform the request.

Raises:



2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
# File 'lib/sphinx/client.rb', line 2515

def with_socket(server)
  attempts = @reqretries
  socket = nil

  begin
    s = server.get_socket do |sock|
      # Remember socket to close it in case of emergency
      socket = sock

      # send my version
      # this is a subtle part. we must do it before (!) reading back from searchd.
      # because otherwise under some conditions (reported on FreeBSD for instance)
      # TCP stack could throttle write-write-read pattern because of Nagle.
      sock.write([1].pack('N'))
      v = sock.read(4).unpack('N*').first

      # Ouch, invalid protocol!
      if v < 1
        raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
      end
    end

    Sphinx::safe_execute(@reqtimeout) do
      yield s
    end
  rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
    logger.warn { "[sphinx] socket failure: #{e.message}" } if logger
    # Ouch, communication problem, will be treated as a connection problem.
    raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
  rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
    # EOFError should not occur in ideal world, because we compare response length
    # with a value passed by Sphinx. But we want to ensure that client will not
    # fail with unexpected error when Sphinx implementation has bugs, aren't we?
    if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
      new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
      new_e.set_backtrace(e.backtrace)
      e = new_e
    end
    logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger

    # Close previously opened socket (in case of it has been really opened)
    server.free_socket(socket)

    # Request error! Do we need to try it again?
    attempts -= 1
    retry if attempts > 0

    # Re-raise original exception
    @error = e.message
    raise e
  ensure
    # Close previously opened socket on any other error
    server.free_socket(socket)
  end
end