Linkify commit messages and commit bodies (#49841)

This commit is contained in:
Tom Ross 2023-03-23 09:05:40 +00:00 committed by GitHub
parent 8f87c6c49f
commit 07a8296a45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 253 additions and 75 deletions

View File

@ -1,17 +1,7 @@
import { Link } from '@sourcegraph/wildcard'
import { ExternalServiceKind } from '../../graphql-operations'
// This regex is supposed to match in the following cases:
//
// - Create search and search-ui packages (#29773)
// - Fix #123 for xyz
//
// However it is supposed not to mach in:
//
// - Something sourcegraph/other-repo#123 or so
// - 123#123
const GH_ISSUE_NUMBER_IN_COMMIT = /([^\dA-Za-z](#\d+))/g
import { Linkified } from '../linkifiy/Linkified'
interface Props {
message: string
@ -34,66 +24,5 @@ export const CommitMessageWithLinks = ({
to,
}
const github = externalURLs ? externalURLs.find(url => url.serviceKind === ExternalServiceKind.GITHUB) : null
const matches = [...message.matchAll(GH_ISSUE_NUMBER_IN_COMMIT)]
if (github && matches.length > 0) {
const url = githubRepoUrl(github.url)
let remainingMessage = message
let skippedCharacters = 0
const linkSegments: React.ReactNode[] = []
for (const match of matches) {
if (match.index === undefined) {
continue
}
const issueNumber = match[2]
const index = remainingMessage.indexOf(issueNumber, match.index - skippedCharacters)
const before = remainingMessage.slice(0, index)
linkSegments.push(
<Link key={linkSegments.length} {...commitLinkProps}>
{before}
</Link>
)
linkSegments.push(
<Link
target="blank"
rel="noreferrer noopener"
key={linkSegments.length}
to={`${url}/pull/${issueNumber.replace('#', '')}`}
>
{issueNumber}
</Link>
)
const nextIndex = index + issueNumber.length
remainingMessage = remainingMessage.slice(index + issueNumber.length)
skippedCharacters += nextIndex
}
linkSegments.push(
<Link key={linkSegments.length} {...commitLinkProps}>
{remainingMessage}
</Link>
)
return <>{linkSegments}</>
}
return <Link {...commitLinkProps}>{message}</Link>
}
// Some places return an URL to objects within a repo, e.g.:
//
// https://github.com/sourcegraph/sourcegraph/commit/ad1ea519e5a31bb868be947107bcf43f4f9fc672
//
// This function removes those unwanted parts
const GITHUB_URL_SCHEMA = /^(https?:\/\/[^/]+\/[^/]+\/[^/]+)(.*)$/
function githubRepoUrl(url: string): string {
const match = url.match(GITHUB_URL_SCHEMA)
if (match?.[1]) {
return match[1]
}
return url
return <Linkified input={message} externalURLs={externalURLs} as={Link} {...commitLinkProps} />
}

View File

@ -13,6 +13,7 @@ import { eventLogger } from '../../tracking/eventLogger'
import { CommitMessageWithLinks } from '../commit/CommitMessageWithLinks'
import { DiffModeSelector } from '../commit/DiffModeSelector'
import { DiffMode } from '../commit/RepositoryCommitPage'
import { Linkified } from '../linkifiy/Linkified'
import { GitCommitNodeByline } from './GitCommitNodeByline'
@ -148,7 +149,9 @@ export const GitCommitNode: React.FunctionComponent<React.PropsWithChildren<GitC
const commitMessageBody =
expandCommitMessageBody || showCommitMessageBody ? (
<div className="w-100">
<pre className={styles.messageBody}>{node.body}</pre>
<pre className={styles.messageBody}>
{node.body && <Linkified input={node.body} externalURLs={node.externalURLs} />}
</pre>
</div>
) : undefined

View File

@ -8,6 +8,7 @@ import { Button, Link, Icon, Code } from '@sourcegraph/wildcard'
import { eventLogger } from '../../tracking/eventLogger'
import { CommitMessageWithLinks } from '../commit/CommitMessageWithLinks'
import { Linkified } from '../linkifiy/Linkified'
import { GitCommitNodeProps } from './GitCommitNode'
import { GitCommitNodeByline } from './GitCommitNodeByline'
@ -64,7 +65,9 @@ export const GitCommitNodeTableRow: React.FC<
expandCommitMessageBody || showCommitMessageBody ? (
<tr className={classNames(styles.tableRow, className)}>
<td colSpan={3}>
<pre className={styles.messageBody}>{node.body}</pre>
<pre className={styles.messageBody}>
{node.body && <Linkified input={node.body} externalURLs={node.externalURLs} />}
</pre>
</td>
</tr>
) : undefined

View File

@ -0,0 +1,57 @@
import React, { useMemo, forwardRef } from 'react'
import { ForwardReferenceComponent, Link } from '@sourcegraph/wildcard'
import { ExternalServiceKind } from '../../graphql-operations'
import { getLinksFromString } from './get-links'
interface LinkifiedProps {
input: string
externalURLs: { url: string; serviceKind: ExternalServiceKind | null }[] | undefined
}
/**
* Takes a given input string and transforms any matching URLs into <a> tags.
*/
export const Linkified = forwardRef((props, ref) => {
const { input, externalURLs, as: Component = React.Fragment, ...otherProps } = props
const elements = useMemo(() => {
const result: React.ReactNode[] = []
const links = getLinksFromString({ input, externalURLs })
let lastIndex = 0
for (const link of links) {
const { start, end, href, value } = link
if (start > lastIndex) {
result.push(
<Component key={`${lastIndex}-${start}`} {...otherProps}>
{input.slice(lastIndex, start)}
</Component>
)
}
result.push(
<Link key={`${start}-${end}`} to={href} target="blank" rel="noreferrer noopener">
{value}
</Link>
)
lastIndex = end
}
if (lastIndex < input.length) {
result.push(
<Component key={`${lastIndex}-${input.length}`} {...otherProps}>
{input.slice(lastIndex)}
</Component>
)
}
return result
}, [Component, externalURLs, input, otherProps])
return <>{elements}</>
}) as ForwardReferenceComponent<React.ExoticComponent, LinkifiedProps>
Linkified.displayName = 'Linkified'

View File

@ -0,0 +1,69 @@
import { ExternalServiceKind } from '../../graphql-operations'
import { getLinksFromString } from './get-links'
const externalURL: { url: string; serviceKind: ExternalServiceKind | null } = {
url: 'https://github.com/sourcegraph/sourcegraph',
serviceKind: ExternalServiceKind.GITHUB,
}
describe('get-links', () => {
test('parses urls and GitHub issues', () => {
const example = 'This contains a url https://sourcegraph.com. This contains a GH issue #1234'
const result = getLinksFromString({ input: example, externalURLs: [externalURL] })
expect(result).toMatchInlineSnapshot(`
Array [
Object {
"end": 43,
"href": "https://sourcegraph.com",
"start": 20,
"type": "url",
"value": "https://sourcegraph.com",
},
Object {
"end": 75,
"href": "https://github.com/sourcegraph/sourcegraph/pull/1234",
"start": 70,
"type": "gh-issue",
"value": "#1234",
},
]
`)
})
test('parses overlapping URLs and GitHub issues', () => {
const example = 'This contains a URL that could be mistaken for a GH issue https://sourcegraph.com/(#1234)'
const result = getLinksFromString({
input: example,
externalURLs: [externalURL],
})
expect(result).toMatchInlineSnapshot(`
Array [
Object {
"end": 89,
"href": "https://sourcegraph.com/(#1234)",
"start": 58,
"type": "url",
"value": "https://sourcegraph.com/(#1234)",
},
]
`)
})
test('does not parse GitHub issues if no external URLS', () => {
const example = 'This contains a GH issue #1234'
const result = getLinksFromString({
input: example,
})
expect(result).toHaveLength(0)
})
test('does not parse file names', () => {
const example = 'This contains a file name that could be mistaken for a URL: example/test/rust.rs'
const result = getLinksFromString({
input: example,
externalURLs: [externalURL],
})
expect(result).toHaveLength(0)
})
})

View File

@ -0,0 +1,110 @@
import { find as linkifyFind } from 'linkifyjs'
import { ExternalServiceKind } from '../../graphql-operations'
// Some places return an URL to objects within a repo, e.g.:
//
// https://github.com/sourcegraph/sourcegraph/commit/ad1ea519e5a31bb868be947107bcf43f4f9fc672
//
// This function removes those unwanted parts
const GITHUB_URL_SCHEMA = /^(https?:\/\/[^/]+\/[^/]+\/[^/]+)(.*)$/
function githubRepoUrl(url: string): string {
const match = url.match(GITHUB_URL_SCHEMA)
if (match?.[1]) {
return match[1]
}
return url
}
// This regex is supposed to match in the following cases:
//
// - Create search and search-ui packages (#29773)
// - Fix #123 for xyz
//
// However it is supposed not to match in:
//
// - Something sourcegraph/other-repo#123 or so
// - 123#123
const GH_ISSUE_NUMBER_IN_COMMIT = /([^\dA-Za-z](#\d+))/g
const getGitHubIssueLinks = (input: string, externalServiceUrl: string): LinkFromString[] => {
const links = []
const matches = [...input.matchAll(GH_ISSUE_NUMBER_IN_COMMIT)]
if (matches.length > 0) {
const url = githubRepoUrl(externalServiceUrl)
for (const match of matches) {
if (match.index === undefined) {
continue
}
const issueNumber = match[2]
links.push({
start: match.index + 1,
end: match.index + match[0].length,
href: `${url}/pull/${issueNumber.replace('#', '')}`,
value: issueNumber,
type: 'gh-issue' as const,
})
}
}
return links
}
/**
* Note: Matching URLs within a random string is difficult, as a URL can contain almost any character.
* For example, it is valid to end a URL with parentheses or other punctuation, but in most cases this will not be desired.
* We use linkifyjs to capture these edge cases and focus on the most common URLs.
] */
const getLinks = (input: string): LinkFromString[] => {
const links = linkifyFind(input)
return links
.filter(({ value }) =>
// Filter out links that don't begin with a protocol.
// This ensures we don't accidentally parse file names as links.
/^(https?|ftp|file):\/\//.test(value)
)
.map(link => ({
start: link.start,
end: link.end,
href: link.href,
value: link.value,
type: 'url',
}))
}
interface GetLinksFromStringParams {
input: string
externalURLs?: { url: string; serviceKind: ExternalServiceKind | null }[]
}
interface LinkFromString {
start: number
end: number
href: string
value: string
type: 'url' | 'gh-issue'
}
/**
* Given an input string, returns a sorted array of links found within the string.
* If `externalURLs` is provided, GitHub issue references (e.g. #1234) will be parsed and included as links.
*/
export const getLinksFromString = ({ input, externalURLs }: GetLinksFromStringParams): LinkFromString[] => {
const github = externalURLs ? externalURLs.find(url => url.serviceKind === ExternalServiceKind.GITHUB) : null
const githubLinks = github ? getGitHubIssueLinks(input, github.url) : []
const links = [...getLinks(input), ...githubLinks]
.sort((a, b) => a.start - b.start)
.filter((link, index, links) => {
// Filter out links that are contained within another link.
// This avoids a scenario where a link is rendered twice, once as a URL and once as a GH issue.
if (index === 0) {
return true
}
return link.start >= links[index - 1].end
})
return links
}

View File

@ -438,6 +438,7 @@
"js-yaml": "^4.1.0",
"jsonc-parser": "^3.0.0",
"linguist-languages": "^7.14.0",
"linkifyjs": "^4.1.0",
"lodash": "^4.17.20",
"lru-cache": "^7.8.0",
"marked": "4.0.16",

View File

@ -296,6 +296,7 @@ importers:
libhoney: ^3.1.1
license-checker: ^25.0.1
linguist-languages: ^7.14.0
linkifyjs: ^4.1.0
lodash: ^4.17.20
lru-cache: ^7.8.0
marked: 4.0.16
@ -508,6 +509,7 @@ importers:
js-yaml: 4.1.0
jsonc-parser: 3.2.0
linguist-languages: 7.14.0
linkifyjs: 4.1.0
lodash: 4.17.21
lru-cache: 7.14.0
marked: 4.0.16
@ -19426,6 +19428,10 @@ packages:
dependencies:
uc.micro: 1.0.5
/linkifyjs/4.1.0:
resolution: {integrity: sha512-Ffv8VoY3+ixI1b3aZ3O+jM6x17cOsgwfB1Wq7pkytbo1WlyRp6ZO0YDMqiWT/gQPY/CmtiGuKfzDIVqxh1aCTA==}
dev: false
/listenercount/1.0.1:
resolution: {integrity: sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ==}
dev: true