codeintel: Add additional canonicalization transformations (#45175)

This commit is contained in:
Eric Fritz 2022-12-05 11:58:02 -06:00 committed by GitHub
parent bfba204ffd
commit 83a87448e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 172 additions and 34 deletions

View File

@ -1,75 +1,65 @@
package types
import (
"sort"
"github.com/sourcegraph/scip/bindings/go/scip"
)
import "github.com/sourcegraph/scip/bindings/go/scip"
// CanonicalizeDocument deterministically re-orders the fields of the given document.
// The input is modified in-place but returned for convenience.
func CanonicalizeDocument(document *scip.Document) *scip.Document {
_ = CanonicalizeOccurrences(document.Occurrences)
_ = CanonicalizeSymbols(document.Symbols)
document.Occurrences = CanonicalizeOccurrences(document.Occurrences)
document.Symbols = CanonicalizeSymbols(document.Symbols)
return document
}
// CanonicalizeOccurrences deterministically re-orders the fields of the given occurrence slice.
// The input is modified in-place but returned for convenience.
func CanonicalizeOccurrences(occurrences []*scip.Occurrence) []*scip.Occurrence {
for _, occurrence := range occurrences {
_ = CanonicalizeOccurrence(occurrence)
canonicalized := make([]*scip.Occurrence, 0, len(occurrences))
for _, occurrence := range FlattenOccurrences(occurrences) {
canonicalized = append(canonicalized, CanonicalizeOccurrence(occurrence))
}
return SortOccurrences(occurrences)
return SortOccurrences(canonicalized)
}
// CanonicalizeOccurrence deterministically re-orders the fields of the given occurrence.
// The input is modified in-place but returned for convenience.
func CanonicalizeOccurrence(occurrence *scip.Occurrence) *scip.Occurrence {
// Express ranges as three-components if possible
occurrence.Range = scip.NewRange(occurrence.Range).SCIPRange()
_ = CanonicalizeDiagnostics(occurrence.Diagnostics)
occurrence.Diagnostics = CanonicalizeDiagnostics(occurrence.Diagnostics)
return occurrence
}
// CanonicalizeDiagnostics deterministically re-orders the fields of the given diagnostic slice.
// The input is modified in-place but returned for convenience.
func CanonicalizeDiagnostics(diagnostics []*scip.Diagnostic) []*scip.Diagnostic {
canonicalized := make([]*scip.Diagnostic, 0, len(diagnostics))
for _, diagnostic := range diagnostics {
_ = CanonicalizeDiagnostic(diagnostic)
canonicalized = append(canonicalized, CanonicalizeDiagnostic(diagnostic))
}
return SortDiagnostics(diagnostics)
return SortDiagnostics(canonicalized)
}
// CanonicalizeDiagnostic deterministically re-orders the fields of the given diagnostic.
// The input is modified in-place but returned for convenience.
func CanonicalizeDiagnostic(diagnostic *scip.Diagnostic) *scip.Diagnostic {
sort.Slice(diagnostic.Tags, func(i, j int) bool {
return diagnostic.Tags[i] < diagnostic.Tags[j]
})
diagnostic.Tags = SortDiagnosticTags(diagnostic.Tags)
return diagnostic
}
// CanonicalizeSymbols deterministically re-orders the fields of the given symbols slice.
// The input is modified in-place but returned for convenience.
func CanonicalizeSymbols(symbols []*scip.SymbolInformation) []*scip.SymbolInformation {
for _, symbol := range symbols {
_ = CanonicalizeSymbol(symbol)
canonicalized := make([]*scip.SymbolInformation, 0, len(symbols))
for _, symbol := range FlattenSymbols(symbols) {
canonicalized = append(canonicalized, CanonicalizeSymbol(symbol))
}
return SortSymbols(symbols)
return SortSymbols(canonicalized)
}
// CanonicalizeSymbol deterministically re-orders the fields of the given symbol.
// The input is modified in-place but returned for convenience.
func CanonicalizeSymbol(symbol *scip.SymbolInformation) *scip.SymbolInformation {
sort.Slice(symbol.Relationships, func(i, j int) bool {
return symbol.Relationships[i].Symbol < symbol.Relationships[j].Symbol
})
symbol.Relationships = CanonicalizeRelationships(symbol.Relationships)
return symbol
}
// CanonicalizeRelationships deterministically re-orders the fields of the given relationship slice.
func CanonicalizeRelationships(relationships []*scip.Relationship) []*scip.Relationship {
return SortRelationships(FlattenRelationship(relationships))
}

View File

@ -0,0 +1,107 @@
package types
import "github.com/sourcegraph/scip/bindings/go/scip"
// FlattenDocuments merges elements of the given slice with the same relative path. This allows us to make
// the assumption post-canonicalization that each index has one representation of a given document path in
// the database. This function returns a new slice.
func FlattenDocuments(documents []*scip.Document) []*scip.Document {
documentMap := make(map[string]*scip.Document, len(documents))
for _, document := range documents {
existing, ok := documentMap[document.RelativePath]
if !ok {
documentMap[document.RelativePath] = document
continue
}
if existing.Language != document.Language {
_ = 0 // TODO - warn?
}
existing.Symbols = append(existing.Symbols, document.Symbols...)
existing.Occurrences = append(existing.Occurrences, document.Occurrences...)
}
flattened := make([]*scip.Document, 0, len(documentMap))
for _, document := range documentMap {
flattened = append(flattened, document)
}
return flattened
}
// FlattenSymbol merges elements of the given slice with the same symbol name. This allows us to make the
// assumption post-canonicalization that each index and document refer to one symbol metadata object uniquely.
// This function returns a new slice.
func FlattenSymbols(symbols []*scip.SymbolInformation) []*scip.SymbolInformation {
symbolMap := make(map[string]*scip.SymbolInformation, len(symbols))
for _, symbol := range symbols {
existing, ok := symbolMap[symbol.Symbol]
if !ok {
symbolMap[symbol.Symbol] = symbol
continue
}
existing.Documentation = append(existing.Documentation, symbol.Documentation...)
existing.Relationships = append(existing.Relationships, symbol.Relationships...)
}
flattened := make([]*scip.SymbolInformation, 0, len(symbolMap))
for _, symbol := range symbolMap {
flattened = append(flattened, symbol)
}
return flattened
}
// FlattenOccurrences merges elements of the given slice with equivalent bounds. This function returns a new slice.
func FlattenOccurrences(occurrences []*scip.Occurrence) []*scip.Occurrence {
if len(occurrences) == 0 {
return occurrences
}
_ = SortOccurrences(occurrences)
flattened := make([]*scip.Occurrence, 0, len(occurrences))
flattened = append(flattened, occurrences[0])
for _, occurrence := range occurrences[1:] {
top := flattened[len(flattened)-1]
if !rawRangesEqual(top.Range, occurrence.Range) {
flattened = append(flattened, occurrence)
continue
}
if top.SyntaxKind != occurrence.SyntaxKind {
_ = 0 // TODO - warn?
}
top.SymbolRoles |= occurrence.SymbolRoles
top.OverrideDocumentation = append(top.OverrideDocumentation, occurrence.OverrideDocumentation...)
top.Diagnostics = append(top.Diagnostics, occurrence.Diagnostics...)
}
return flattened
}
// FlattenRelationship merges elements of the given slice with equivalent symbol names. This function returns a new
// slice.
func FlattenRelationship(relationships []*scip.Relationship) []*scip.Relationship {
relationshipMap := make(map[string][]*scip.Relationship, len(relationships))
for _, relationship := range relationships {
relationshipMap[relationship.Symbol] = append(relationshipMap[relationship.Symbol], relationship)
}
flattened := make([]*scip.Relationship, 0, len(relationshipMap))
for _, relationships := range relationshipMap {
combined := relationships[0]
for _, relationship := range relationships[1:] {
combined.IsReference = combined.IsReference || relationship.IsReference
combined.IsImplementation = combined.IsImplementation || relationship.IsImplementation
combined.IsTypeDefinition = combined.IsTypeDefinition || relationship.IsTypeDefinition
combined.IsDefinition = combined.IsDefinition || relationship.IsDefinition
}
flattened = append(flattened, combined)
}
return flattened
}

View File

@ -27,15 +27,38 @@ func FindOccurrences(occurrences []*scip.Occurrence, targetLine, targetCharacter
// SortOccurrences sorts the given occurrence slice (in-place) and returns it (for convenience).
// Occurrences sorted in ascending order of their range's starting position, where enclosing ranges
// come before the enclosed.
// come before the enclosed. If there are multiple occurrences with the exact same range, then the
// occurrences are sorted by symbol name.
func SortOccurrences(occurrences []*scip.Occurrence) []*scip.Occurrence {
sort.Slice(occurrences, func(i, j int) bool {
if rawRangesEqual(occurrences[i].Range, occurrences[j].Range) {
return occurrences[i].Symbol < occurrences[j].Symbol
}
return compareRanges(occurrences[i].Range, occurrences[j].Range...) <= 0
})
return occurrences
}
// rawRangesEqual compares the given SCIP-encoded raw ranges for equality.
func rawRangesEqual(a, b []int32) bool {
if len(a) == len(b) {
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
ra := scip.NewRange(a)
rb := scip.NewRange(b)
return ra.Start.Line == rb.Start.Line && ra.Start.Character == rb.Start.Character && ra.End.Line == rb.End.Line && ra.End.Character == rb.End.Character
}
// SortRanges sorts the given range slice (in-place) and returns it (for convenience). Ranges are
// sorted in ascending order of starting position, where enclosing ranges come before the enclosed.
func SortRanges(ranges []*scip.Range) []*scip.Range {
@ -64,7 +87,7 @@ func SortSymbols(symbols []*scip.SymbolInformation) []*scip.SymbolInformation {
}
// SortDiagnostics sorts the given diagnostics slice (in-place) and returns it (for convenience).
// Diagnostics are sorted firs tyb severity (more severe earlier in the slice) and then by the
// Diagnostics are sorted first by severity (more severe earlier in the slice) and then by the
// diagnostic message.
func SortDiagnostics(diagnostics []*scip.Diagnostic) []*scip.Diagnostic {
sort.Slice(diagnostics, func(i, j int) bool {
@ -80,6 +103,24 @@ func SortDiagnostics(diagnostics []*scip.Diagnostic) []*scip.Diagnostic {
return diagnostics
}
// SortDiagnosticTags sorts the given diagnostic tags slice (in-place) and returns it (for convenience).
func SortDiagnosticTags(tags []scip.DiagnosticTag) []scip.DiagnosticTag {
sort.Slice(tags, func(i, j int) bool {
return tags[i] < tags[j]
})
return tags
}
// SortRelationships sorts the given symbol relationships slice (in-place) and returns it (for convenience).
func SortRelationships(relationships []*scip.Relationship) []*scip.Relationship {
sort.Slice(relationships, func(i, j int) bool {
return relationships[i].Symbol < relationships[j].Symbol
})
return relationships
}
// compareRanges compares the order of the leading edge of the two ranges. This method returns
//
// - -1 if the leading edge of r2 occurs before r1,