Search: boost matches on quoted terms (#64298)

Follow up to #64207. In our old search semantics, quotes were
interpreted literally. So a query like `"sourcegraph"` would match only
strings like `fmt.Println("sourcegraph")`. Now, both single and double
quotes are used for escaping, and mean that the contents should be
searched exactly.

This PR makes sure to boost matches on quoted terms in result ranking.
This way, users familiar with the old syntax are more likely to find
what they're after.

## Test plan

Adapted unit tests. Re-tested all queries from #64207 manually, plus
these ones:
* `'sourcegraph'`
* `"sourcegraph"`
This commit is contained in:
Julie Tibshirani 2024-08-06 15:55:24 +03:00 committed by GitHub
parent ff52b14dd1
commit 958afb0936
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 19 deletions

View File

@ -1,12 +1,9 @@
package query
import (
"fmt"
"strconv"
"strings"
"github.com/grafana/regexp"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
@ -691,8 +688,12 @@ func ExperimentalPhraseBoost(originalQuery string, basic Basic) Basic {
// Check if the pattern is a single top-level AND expression with no negated or regexp clauses.
switch p := basic.Pattern.(type) {
case Pattern:
if !p.Annotation.Labels.IsSet(Quoted) || p.Negated || p.Annotation.Labels.IsSet(Regexp) {
return basic
}
case Operator:
if p.Kind != And || len(p.Operands) <= 1 {
if p.Kind != And {
return basic
}
for _, child := range p.Operands {
@ -714,14 +715,12 @@ func ExperimentalPhraseBoost(originalQuery string, basic Basic) Basic {
}
query := strings.Join(filteredTerms, " ")
pattern := fmt.Sprintf(`(^|\b)%s($|\b)`, regexp.QuoteMeta(query))
basic.Pattern = Operator{
Kind: Or,
Operands: []Node{
Pattern{
Value: pattern,
Annotation: Annotation{Labels: Boost | Regexp | Standard},
Value: query,
Annotation: Annotation{Labels: Boost | Literal | Standard},
},
basic.Pattern,
},

View File

@ -23,28 +23,31 @@ func TestExperimentalPhraseBoost(t *testing.T) {
}
// expect phrase query
autogold.Expect(`(or "(^|\\b)foo bar bas($|\\b)" (and "foo" "bar" "bas"))`).Equal(t, test("foo bar bas", SearchTypeKeyword))
autogold.Expect(`(or "(^|\\b)\\(foo and bar\\) and bas($|\\b)" (and "foo" "bar" "bas"))`).Equal(t, test("(foo and bar) and bas", SearchTypeKeyword))
autogold.Expect(`(or "(^|\\b)\\* int func\\(($|\\b)" (and "*" "int" "func("))`).Equal(t, test("* int func(", SearchTypeKeyword))
autogold.Expect(`(or "(^|\\b)\"foo bar\" bas qux($|\\b)" (and "foo bar" "bas" "qux"))`).Equal(t, test(`"foo bar" bas qux`, SearchTypeKeyword))
autogold.Expect(`(or "(^|\\b)foo 'bar bas' qux($|\\b)" (and "foo" "bar bas" "qux"))`).Equal(t, test(`foo 'bar bas' qux`, SearchTypeKeyword))
autogold.Expect(`(or "foo bar bas" (and "foo" "bar" "bas"))`).Equal(t, test("foo bar bas", SearchTypeKeyword))
autogold.Expect(`(or "(foo and bar) and bas" (and "foo" "bar" "bas"))`).Equal(t, test("(foo and bar) and bas", SearchTypeKeyword))
autogold.Expect(`(or "* int func(" (and "*" "int" "func("))`).Equal(t, test("* int func(", SearchTypeKeyword))
autogold.Expect(`(or "\"foo bar\" bas qux" (and "foo bar" "bas" "qux"))`).Equal(t, test(`"foo bar" bas qux`, SearchTypeKeyword))
autogold.Expect(`(or "foo 'bar bas' qux" (and "foo" "bar bas" "qux"))`).Equal(t, test(`foo 'bar bas' qux`, SearchTypeKeyword))
// expect no phrase query
autogold.Expect(`"foo bar bas"`).Equal(t, test("/foo bar bas/", SearchTypeKeyword))
autogold.Expect(`(and "foo" "bar" "ba.*")`).Equal(t, test("foo bar /ba.*/", SearchTypeKeyword))
autogold.Expect(`"foo"`).Equal(t, test("foo", SearchTypeKeyword))
autogold.Expect(`(or "(^|\\b)foo and bar($|\\b)" (and "foo" "bar"))`).Equal(t, test("foo and bar", SearchTypeKeyword))
autogold.Expect(`(or "foo and bar" (and "foo" "bar"))`).Equal(t, test("foo and bar", SearchTypeKeyword))
autogold.Expect(`(and "foo" (not "bar"))`).Equal(t, test("foo not bar", SearchTypeKeyword))
autogold.Expect(`(and "foo" "bar" (not "bas") "quz")`).Equal(t, test("foo bar not bas quz", SearchTypeKeyword))
autogold.Expect(`(or "foo" "bar" "bas")`).Equal(t, test("foo or bar or bas", SearchTypeKeyword))
autogold.Expect(`(or (and "foo" "bar") (and "quz" "biz"))`).Equal(t, test("foo and bar or (quz and biz)", SearchTypeKeyword))
// cases that came up in user feedback
autogold.Expect(`(and "repo:golang/go" (or "(^|\\b)// The vararg opts parameter can include functions to configure the($|\\b)" (and "//" "The" "vararg" "opts" "parameter" "can" "include" "functions" "to" "configure" "the")))`).Equal(t, test("repo:golang/go // The vararg opts parameter can include functions to configure the", SearchTypeKeyword))
autogold.Expect(`(and "context:global" (or "(^|\\b)invalid modelID;($|\\b)" (and "invalid" "modelID;")))`).Equal(t, test("context:global invalid modelID;", SearchTypeKeyword))
autogold.Expect(`(and "context:global" (or "(^|\\b)return \"various\";($|\\b)" (and "return" "\"various\";")))`).Equal(t, test("context:global return \"various\";", SearchTypeKeyword))
autogold.Expect(`(and "repo:golang/go" (or "(^|\\b)test server($|\\b)" (and "test" "server")))`).Equal(t, test("repo:golang/go test server", SearchTypeKeyword))
autogold.Expect(`(and "repo:sourcegraph/cody@main" (or "(^|\\b)the models and other($|\\b)" (and "the" "models" "other")))`).Equal(t, test("repo:sourcegraph/cody@main the models and other ", SearchTypeKeyword))
autogold.Expect(`(and "repo:golang/go" (or "// The vararg opts parameter can include functions to configure the" (and "//" "The" "vararg" "opts" "parameter" "can" "include" "functions" "to" "configure" "the")))`).Equal(t, test("repo:golang/go // The vararg opts parameter can include functions to configure the", SearchTypeKeyword))
autogold.Expect(`(and "context:global" (or "invalid modelID;" (and "invalid" "modelID;")))`).Equal(t, test("context:global invalid modelID;", SearchTypeKeyword))
autogold.Expect(`(and "context:global" (or "return \"various\";" (and "return" "\"various\";")))`).Equal(t, test("context:global return \"various\";", SearchTypeKeyword))
autogold.Expect(`(and "repo:golang/go" (or "test server" (and "test" "server")))`).Equal(t, test("repo:golang/go test server", SearchTypeKeyword))
autogold.Expect(`(and "repo:sourcegraph/cody@main" (or "the models and other" (and "the" "models" "other")))`).Equal(t, test("repo:sourcegraph/cody@main the models and other ", SearchTypeKeyword))
autogold.Expect(`(and "repo:sourcegraph/cody@main" (or "'sourcegraph'" "sourcegraph"))`).Equal(t, test("repo:sourcegraph/cody@main 'sourcegraph'", SearchTypeKeyword))
autogold.Expect(`(and "repo:sourcegraph/zoekt" (or "\"some string\"" "some string"))`).Equal(t, test("repo:sourcegraph/zoekt \"some string\"", SearchTypeKeyword))
}
func TestSubstituteAliases(t *testing.T) {