Skip to content

Commit

Permalink
Merge pull request #14 from mhalle/boolean-expand
Browse files Browse the repository at this point in the history
changed implementation of boolean operation to allow AND and expand:true
  • Loading branch information
weixsong committed Apr 14, 2016
2 parents 6c3edd1 + 143294a commit b20a10a
Showing 1 changed file with 83 additions and 33 deletions.
116 changes: 83 additions & 33 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,14 @@ elasticlunr.Index.prototype.search = function (query, userConfig) {
return results;
};

/**
* search queryTokens in specified field.
*
* @param {Array} queryTokens The query tokens to query in this field.
* @param {String} field Field to query in.
* @param {elasticlunr.Configuration} config The user query config, JSON format.
* @return {Object}
*/
/**
* search queryTokens in specified field.
*
Expand All @@ -371,19 +379,49 @@ elasticlunr.Index.prototype.search = function (query, userConfig) {
elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) {
var booleanType = config[fieldName].bool;
var expand = config[fieldName].expand;
var scores = {};
var scores = null;
var docTokens = {};

queryTokens.forEach(function (token) {
var tokens = [token];
if (expand == true) {
tokens = this.index[fieldName].expandToken(token);
}

// Consider every query token in turn. If expanded, each query token
// corresponds to a set of tokens, which is all tokens in the
// index matching the pattern queryToken* .
// For the set of tokens corresponding to a query token, find and score
// all matching documents. Store those scores in queryTokenScores,
// keyed by docRef.
// Then, depending on the value of booleanType, combine the scores
// for this query token with previous scores. If booleanType is OR,
// then merge the scores by summing into the accumulated total, adding
// new document scores are required (effectively a union operator).
// If booleanType is AND, accumulate scores only if the document
// has previously been scored by another query token (an intersection
// operation0.
// Furthermore, since when booleanType is AND, additional
// query tokens can't add new documents to the result set, use the
// current document set to limit the processing of each new query
// token for efficiency (i.e., incremental intersection).

var queryTokenScores = {};
tokens.forEach(function (key) {
var docs = this.index[fieldName].getDocs(key);
var idf = this.idf(key, fieldName);


if (scores && booleanType == 'AND') {
// special case, we can rule out documents that have been
// already been filtered out because they weren't scored
// by previous query token passes.
var filteredDocs = {};
for (var docRef in scores) {
if (docRef in docs) {
filteredDocs[docRef] = docs[docRef];
}
}
docs = filteredDocs;
}
// only record appeared token for retrieved documents for the
// original token, not for expaned token.
// beause for doing coordNorm for a retrieved document, coordNorm only care how many
Expand Down Expand Up @@ -411,24 +449,58 @@ elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, conf

var score = tf * idf * fieldLengthNorm * penality;

if (docRef in scores) {
scores[docRef] += score;
if (docRef in queryTokenScores) {
queryTokenScores[docRef] += score;
} else {
scores[docRef] = score;
queryTokenScores[docRef] = score;
}
}
}, this);

scores = this.mergeScores(scores, queryTokenScores, booleanType);
}, this);

if (booleanType == 'AND') {
scores = this.intersect(scores, docTokens, queryTokens.length);
}

scores = this.coordNorm(scores, docTokens, queryTokens.length);

return scores;
};

/**
* Merge the scores from one set of tokens into an accumulated score table.
* Exact operation depends on the op parameter. If op is 'AND', then only the
* intersection of the two score lists is retained. Otherwise, the union of
* the two score lists is returned. For internal use only.
*
* @param {Object} bool accumulated scores. Should be null on first call.
* @param {String} scores new scores to merge into accumScores.
* @param {Object} op merge operation (should be 'AND' or 'OR').
*
*/

elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) {
if (!accumScores) {
return scores;
}
if (op == 'AND') {
var intersection = {};
for (var docRef in scores) {
if (docRef in accumScores) {
intersection[docRef] = accumScores[docRef] + scores[docRef];
}
}
return intersection;
} else {
for (var docRef in scores) {
if (docRef in accumScores) {
accumScores[docRef] += scores[docRef];
} else {
accumScores[docRef] = scores[docRef];
}
}
return accumScores;
}
};


/**
* Record the occuring query token of retrieved doc specified by doc field.
* Only for inner user.
Expand All @@ -448,28 +520,6 @@ elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs)
}
};

/**
* find documents contain all the query tokens.
* only for inner use.
*
* @param {Object} results first results
* @param {Object} docs field search results of a token
* @param {Integer} n query token number
* @return {Object}
*/
elasticlunr.Index.prototype.intersect = function (scores, docTokens, n) {
var res = {};

for (var doc in scores) {
if (!(doc in docTokens)) continue;
if (docTokens[doc].length == n) {
res[doc] = scores[doc];
}
}

return res;
};

/**
* coord norm the score of a doc.
* if a doc contain more query tokens, then the score will larger than the doc
Expand Down

0 comments on commit b20a10a

Please sign in to comment.