From 07a8296a451916b17966d02fc97f58ef63e40eda Mon Sep 17 00:00:00 2001 From: Tom Ross Date: Thu, 23 Mar 2023 09:05:40 +0000 Subject: [PATCH] Linkify commit messages and commit bodies (#49841) --- .../repo/commit/CommitMessageWithLinks.tsx | 75 +----------- client/web/src/repo/commits/GitCommitNode.tsx | 5 +- .../repo/commits/GitCommitNodeTableRow.tsx | 5 +- client/web/src/repo/linkifiy/Linkified.tsx | 57 +++++++++ .../web/src/repo/linkifiy/get-links.test.ts | 69 +++++++++++ client/web/src/repo/linkifiy/get-links.ts | 110 ++++++++++++++++++ package.json | 1 + pnpm-lock.yaml | 6 + 8 files changed, 253 insertions(+), 75 deletions(-) create mode 100644 client/web/src/repo/linkifiy/Linkified.tsx create mode 100644 client/web/src/repo/linkifiy/get-links.test.ts create mode 100644 client/web/src/repo/linkifiy/get-links.ts diff --git a/client/web/src/repo/commit/CommitMessageWithLinks.tsx b/client/web/src/repo/commit/CommitMessageWithLinks.tsx index f43028df6b7..465651dc2bc 100644 --- a/client/web/src/repo/commit/CommitMessageWithLinks.tsx +++ b/client/web/src/repo/commit/CommitMessageWithLinks.tsx @@ -1,17 +1,7 @@ import { Link } from '@sourcegraph/wildcard' import { ExternalServiceKind } from '../../graphql-operations' - -// This regex is supposed to match in the following cases: -// -// - Create search and search-ui packages (#29773) -// - Fix #123 for xyz -// -// However it is supposed not to mach in: -// -// - Something sourcegraph/other-repo#123 or so -// - 123#123 -const GH_ISSUE_NUMBER_IN_COMMIT = /([^\dA-Za-z](#\d+))/g +import { Linkified } from '../linkifiy/Linkified' interface Props { message: string @@ -34,66 +24,5 @@ export const CommitMessageWithLinks = ({ to, } - const github = externalURLs ? externalURLs.find(url => url.serviceKind === ExternalServiceKind.GITHUB) : null - const matches = [...message.matchAll(GH_ISSUE_NUMBER_IN_COMMIT)] - if (github && matches.length > 0) { - const url = githubRepoUrl(github.url) - let remainingMessage = message - let skippedCharacters = 0 - const linkSegments: React.ReactNode[] = [] - - for (const match of matches) { - if (match.index === undefined) { - continue - } - const issueNumber = match[2] - const index = remainingMessage.indexOf(issueNumber, match.index - skippedCharacters) - const before = remainingMessage.slice(0, index) - - linkSegments.push( - - {before} - - ) - linkSegments.push( - - {issueNumber} - - ) - - const nextIndex = index + issueNumber.length - remainingMessage = remainingMessage.slice(index + issueNumber.length) - skippedCharacters += nextIndex - } - - linkSegments.push( - - {remainingMessage} - - ) - - return <>{linkSegments} - } - - return {message} -} - -// Some places return an URL to objects within a repo, e.g.: -// -// https://github.com/sourcegraph/sourcegraph/commit/ad1ea519e5a31bb868be947107bcf43f4f9fc672 -// -// This function removes those unwanted parts -const GITHUB_URL_SCHEMA = /^(https?:\/\/[^/]+\/[^/]+\/[^/]+)(.*)$/ -function githubRepoUrl(url: string): string { - const match = url.match(GITHUB_URL_SCHEMA) - if (match?.[1]) { - return match[1] - } - - return url + return } diff --git a/client/web/src/repo/commits/GitCommitNode.tsx b/client/web/src/repo/commits/GitCommitNode.tsx index 03a9d62f62f..87b2d5c7cff 100644 --- a/client/web/src/repo/commits/GitCommitNode.tsx +++ b/client/web/src/repo/commits/GitCommitNode.tsx @@ -13,6 +13,7 @@ import { eventLogger } from '../../tracking/eventLogger' import { CommitMessageWithLinks } from '../commit/CommitMessageWithLinks' import { DiffModeSelector } from '../commit/DiffModeSelector' import { DiffMode } from '../commit/RepositoryCommitPage' +import { Linkified } from '../linkifiy/Linkified' import { GitCommitNodeByline } from './GitCommitNodeByline' @@ -148,7 +149,9 @@ export const GitCommitNode: React.FunctionComponent -
{node.body}
+
+                    {node.body && }
+                
) : undefined diff --git a/client/web/src/repo/commits/GitCommitNodeTableRow.tsx b/client/web/src/repo/commits/GitCommitNodeTableRow.tsx index 705100fbfae..2f05a0901a3 100644 --- a/client/web/src/repo/commits/GitCommitNodeTableRow.tsx +++ b/client/web/src/repo/commits/GitCommitNodeTableRow.tsx @@ -8,6 +8,7 @@ import { Button, Link, Icon, Code } from '@sourcegraph/wildcard' import { eventLogger } from '../../tracking/eventLogger' import { CommitMessageWithLinks } from '../commit/CommitMessageWithLinks' +import { Linkified } from '../linkifiy/Linkified' import { GitCommitNodeProps } from './GitCommitNode' import { GitCommitNodeByline } from './GitCommitNodeByline' @@ -64,7 +65,9 @@ export const GitCommitNodeTableRow: React.FC< expandCommitMessageBody || showCommitMessageBody ? ( -
{node.body}
+
+                        {node.body && }
+                    
) : undefined diff --git a/client/web/src/repo/linkifiy/Linkified.tsx b/client/web/src/repo/linkifiy/Linkified.tsx new file mode 100644 index 00000000000..96ddd290e34 --- /dev/null +++ b/client/web/src/repo/linkifiy/Linkified.tsx @@ -0,0 +1,57 @@ +import React, { useMemo, forwardRef } from 'react' + +import { ForwardReferenceComponent, Link } from '@sourcegraph/wildcard' + +import { ExternalServiceKind } from '../../graphql-operations' + +import { getLinksFromString } from './get-links' + +interface LinkifiedProps { + input: string + externalURLs: { url: string; serviceKind: ExternalServiceKind | null }[] | undefined +} + +/** + * Takes a given input string and transforms any matching URLs into tags. + */ +export const Linkified = forwardRef((props, ref) => { + const { input, externalURLs, as: Component = React.Fragment, ...otherProps } = props + + const elements = useMemo(() => { + const result: React.ReactNode[] = [] + + const links = getLinksFromString({ input, externalURLs }) + let lastIndex = 0 + + for (const link of links) { + const { start, end, href, value } = link + if (start > lastIndex) { + result.push( + + {input.slice(lastIndex, start)} + + ) + } + result.push( + + {value} + + ) + lastIndex = end + } + + if (lastIndex < input.length) { + result.push( + + {input.slice(lastIndex)} + + ) + } + + return result + }, [Component, externalURLs, input, otherProps]) + + return <>{elements} +}) as ForwardReferenceComponent + +Linkified.displayName = 'Linkified' diff --git a/client/web/src/repo/linkifiy/get-links.test.ts b/client/web/src/repo/linkifiy/get-links.test.ts new file mode 100644 index 00000000000..bfd3dc6f006 --- /dev/null +++ b/client/web/src/repo/linkifiy/get-links.test.ts @@ -0,0 +1,69 @@ +import { ExternalServiceKind } from '../../graphql-operations' + +import { getLinksFromString } from './get-links' + +const externalURL: { url: string; serviceKind: ExternalServiceKind | null } = { + url: 'https://github.com/sourcegraph/sourcegraph', + serviceKind: ExternalServiceKind.GITHUB, +} + +describe('get-links', () => { + test('parses urls and GitHub issues', () => { + const example = 'This contains a url https://sourcegraph.com. This contains a GH issue #1234' + const result = getLinksFromString({ input: example, externalURLs: [externalURL] }) + expect(result).toMatchInlineSnapshot(` + Array [ + Object { + "end": 43, + "href": "https://sourcegraph.com", + "start": 20, + "type": "url", + "value": "https://sourcegraph.com", + }, + Object { + "end": 75, + "href": "https://github.com/sourcegraph/sourcegraph/pull/1234", + "start": 70, + "type": "gh-issue", + "value": "#1234", + }, + ] + `) + }) + + test('parses overlapping URLs and GitHub issues', () => { + const example = 'This contains a URL that could be mistaken for a GH issue https://sourcegraph.com/(#1234)' + const result = getLinksFromString({ + input: example, + externalURLs: [externalURL], + }) + expect(result).toMatchInlineSnapshot(` + Array [ + Object { + "end": 89, + "href": "https://sourcegraph.com/(#1234)", + "start": 58, + "type": "url", + "value": "https://sourcegraph.com/(#1234)", + }, + ] + `) + }) + + test('does not parse GitHub issues if no external URLS', () => { + const example = 'This contains a GH issue #1234' + const result = getLinksFromString({ + input: example, + }) + expect(result).toHaveLength(0) + }) + + test('does not parse file names', () => { + const example = 'This contains a file name that could be mistaken for a URL: example/test/rust.rs' + const result = getLinksFromString({ + input: example, + externalURLs: [externalURL], + }) + expect(result).toHaveLength(0) + }) +}) diff --git a/client/web/src/repo/linkifiy/get-links.ts b/client/web/src/repo/linkifiy/get-links.ts new file mode 100644 index 00000000000..a7649eb80f9 --- /dev/null +++ b/client/web/src/repo/linkifiy/get-links.ts @@ -0,0 +1,110 @@ +import { find as linkifyFind } from 'linkifyjs' + +import { ExternalServiceKind } from '../../graphql-operations' + +// Some places return an URL to objects within a repo, e.g.: +// +// https://github.com/sourcegraph/sourcegraph/commit/ad1ea519e5a31bb868be947107bcf43f4f9fc672 +// +// This function removes those unwanted parts +const GITHUB_URL_SCHEMA = /^(https?:\/\/[^/]+\/[^/]+\/[^/]+)(.*)$/ +function githubRepoUrl(url: string): string { + const match = url.match(GITHUB_URL_SCHEMA) + if (match?.[1]) { + return match[1] + } + + return url +} + +// This regex is supposed to match in the following cases: +// +// - Create search and search-ui packages (#29773) +// - Fix #123 for xyz +// +// However it is supposed not to match in: +// +// - Something sourcegraph/other-repo#123 or so +// - 123#123 +const GH_ISSUE_NUMBER_IN_COMMIT = /([^\dA-Za-z](#\d+))/g + +const getGitHubIssueLinks = (input: string, externalServiceUrl: string): LinkFromString[] => { + const links = [] + + const matches = [...input.matchAll(GH_ISSUE_NUMBER_IN_COMMIT)] + if (matches.length > 0) { + const url = githubRepoUrl(externalServiceUrl) + for (const match of matches) { + if (match.index === undefined) { + continue + } + const issueNumber = match[2] + links.push({ + start: match.index + 1, + end: match.index + match[0].length, + href: `${url}/pull/${issueNumber.replace('#', '')}`, + value: issueNumber, + type: 'gh-issue' as const, + }) + } + } + + return links +} + +/** + * Note: Matching URLs within a random string is difficult, as a URL can contain almost any character. + * For example, it is valid to end a URL with parentheses or other punctuation, but in most cases this will not be desired. + * We use linkifyjs to capture these edge cases and focus on the most common URLs. +] */ +const getLinks = (input: string): LinkFromString[] => { + const links = linkifyFind(input) + return links + .filter(({ value }) => + // Filter out links that don't begin with a protocol. + // This ensures we don't accidentally parse file names as links. + /^(https?|ftp|file):\/\//.test(value) + ) + .map(link => ({ + start: link.start, + end: link.end, + href: link.href, + value: link.value, + type: 'url', + })) +} + +interface GetLinksFromStringParams { + input: string + externalURLs?: { url: string; serviceKind: ExternalServiceKind | null }[] +} + +interface LinkFromString { + start: number + end: number + href: string + value: string + type: 'url' | 'gh-issue' +} + +/** + * Given an input string, returns a sorted array of links found within the string. + * If `externalURLs` is provided, GitHub issue references (e.g. #1234) will be parsed and included as links. + */ +export const getLinksFromString = ({ input, externalURLs }: GetLinksFromStringParams): LinkFromString[] => { + const github = externalURLs ? externalURLs.find(url => url.serviceKind === ExternalServiceKind.GITHUB) : null + const githubLinks = github ? getGitHubIssueLinks(input, github.url) : [] + + const links = [...getLinks(input), ...githubLinks] + .sort((a, b) => a.start - b.start) + .filter((link, index, links) => { + // Filter out links that are contained within another link. + // This avoids a scenario where a link is rendered twice, once as a URL and once as a GH issue. + if (index === 0) { + return true + } + return link.start >= links[index - 1].end + }) + + return links +} diff --git a/package.json b/package.json index 88f427d8689..a6b93671457 100644 --- a/package.json +++ b/package.json @@ -438,6 +438,7 @@ "js-yaml": "^4.1.0", "jsonc-parser": "^3.0.0", "linguist-languages": "^7.14.0", + "linkifyjs": "^4.1.0", "lodash": "^4.17.20", "lru-cache": "^7.8.0", "marked": "4.0.16", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 667baae5e05..efcc41c28e0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -296,6 +296,7 @@ importers: libhoney: ^3.1.1 license-checker: ^25.0.1 linguist-languages: ^7.14.0 + linkifyjs: ^4.1.0 lodash: ^4.17.20 lru-cache: ^7.8.0 marked: 4.0.16 @@ -508,6 +509,7 @@ importers: js-yaml: 4.1.0 jsonc-parser: 3.2.0 linguist-languages: 7.14.0 + linkifyjs: 4.1.0 lodash: 4.17.21 lru-cache: 7.14.0 marked: 4.0.16 @@ -19426,6 +19428,10 @@ packages: dependencies: uc.micro: 1.0.5 + /linkifyjs/4.1.0: + resolution: {integrity: sha512-Ffv8VoY3+ixI1b3aZ3O+jM6x17cOsgwfB1Wq7pkytbo1WlyRp6ZO0YDMqiWT/gQPY/CmtiGuKfzDIVqxh1aCTA==} + dev: false + /listenercount/1.0.1: resolution: {integrity: sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ==} dev: true