Render Jupyter notebooks (#62583)

Adds basic Jupyter notebook rendering to the file view. It does not include math rendering, only basic block rendering.
This commit is contained in:
Camden Cheek 2024-05-10 12:21:10 -04:00 committed by GitHub
parent 7f9549efbf
commit a4a1111c56
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1060 additions and 81 deletions

View File

@ -1,5 +1,6 @@
<script lang="ts">
import { renderMarkdown } from '$lib/common'
import styles from './Markdown.module.scss'
export let content: string

View File

@ -233,7 +233,8 @@
<a href="{repoURL}/-/raw/{filePath}" target="_blank" download>Download file</a>
</Alert>
{:else if blob && showFormattedView}
<div class={`rich ${markdownStyles.markdown}`}>
<!-- jupyter is a global style -->
<div class={`rich jupyter ${markdownStyles.markdown}`}>
{@html blob.richHTML}
</div>
{:else if blob}

View File

@ -1,4 +1,5 @@
@import 'wildcard/src/global-styles/colors';
@import 'wildcard/src/global-styles/jupyter';
@import 'wildcard/src/global-styles/variables';
@import 'wildcard/src/global-styles/reboot';

View File

@ -24,7 +24,8 @@ export const RenderedFile = forwardRef<HTMLDivElement, Props>(function RenderedF
useScrollToLocationHash(location)
return (
<div ref={reference} className={classNames(styles.renderedFile, className)}>
// the jupyter class comes from global style sheets
<div ref={reference} className={classNames(styles.renderedFile, className, 'jupyter')}>
<div className={styles.container}>
<Markdown dangerousInnerHTML={dangerousInnerHTML} />
</div>

View File

@ -22,6 +22,7 @@
@import './progress';
@import './collapse';
@import './grid';
@import './jupyter';
// Show a focus ring when performing keyboard navigation. Uses the polyfill at
// https://github.com/WICG/focus-visible because few browsers support :focus-visible.

View File

@ -0,0 +1,615 @@
/* stylelint-disable selector-class-pattern */
// Rendered jupyter notebook styling
.jupyter {
--jp-cell-prompt-width: 6rem;
/* Cell */
.jp-Cell {
padding: 0.25rem;
margin: 0;
border: none;
outline: none;
background: transparent;
pre {
border: 1px solid var(--color-bg-3);
padding: 0.5rem;
margin: 0;
}
code {
display: inline-block;
padding: 0;
background: transparent;
}
}
// Put a space between input and output when there IS output
.jp-Cell:not(.jp-mod-noOutputs) .jp-Cell-outputWrapper {
margin-top: 0.25rem;
}
/* Collapser */
.jp-Collapser {
flex: 0 0 0.5rem;
padding: 0;
margin: 0;
border: none;
outline: none;
background: transparent;
border-radius: 2px;
opacity: 1;
}
.jp-Collapser-child {
display: block;
width: 100%;
box-sizing: border-box;
// height: 100% doesn't work because the height of its parent is computed from content
position: absolute;
top: 0;
bottom: 0;
}
// Make the output collapser disappear when there is not output, but do so
// in a manner that leaves it in the layout and preserves its width.
.jp-Cell.jp-mod-noOutputs .jp-Cell-outputCollapser {
border: none !important;
background: transparent !important;
}
.jp-Cell:not(.jp-mod-noOutputs) .jp-Cell-outputCollapser {
min-height: 20px;
}
/* Common input/output */
.jp-Cell-inputWrapper,
.jp-Cell-outputWrapper {
display: flex;
flex-direction: row;
padding: 0;
margin: 0;
// Added to reveal the box-shadow on the input and output collapsers.
overflow: visible;
}
// Only input/output areas inside cells
.jp-Cell-inputArea,
.jp-Cell-outputArea {
flex: 1 1 auto;
}
/* Input */
// All input areas
.jp-InputArea {
display: table;
table-layout: fixed;
width: 100%;
overflow: hidden;
}
.jp-InputArea-editor {
display: table-cell;
overflow: hidden;
vertical-align: top;
}
.jp-InputPrompt,
.jp-OutputPrompt {
display: table-cell;
vertical-align: top;
color: var(--text-disabled);
font-family: var(--code-font-family);
font-size: var(--font-size-small);
padding: 0.25rem;
&:not(:empty) {
width: var(--jp-cell-prompt-width);
}
// Right align prompt text, don't wrap to handle large prompt numbers
text-align: right;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
// Disable text selection
user-select: none;
}
/* Markdown Cell */
.jp-MarkdownOutput {
display: table-cell;
width: 100%;
margin-top: 0;
margin-bottom: 0;
padding-left: var(--jp-code-padding);
}
.jp-MarkdownOutput.jp-RenderedHTMLCommon {
overflow: auto;
}
.jp-RenderedMarkdown.jp-RenderedHTMLCommon td,
.jp-RenderedMarkdown.jp-RenderedHTMLCommon th {
max-width: none;
}
/* Code Cell */
.jp-CodeMirrorEditor {
cursor: text;
}
.cm-editor.cm-s-jupyter {
font-family: var(--code-font-family);
border: 0;
border-radius: 0;
height: auto;
pre {
padding: 0;
code {
min-width: 100%;
}
}
.highlight pre {
padding: 0.25rem 0.25rem;
margin: 0;
font-family: inherit;
font-size: inherit;
line-height: inherit;
color: inherit;
}
}
/* Main OutputArea */
.jp-OutputArea {
overflow-y: auto;
}
.jp-OutputArea-child {
display: table;
table-layout: fixed;
width: 100%;
overflow: hidden;
}
.jp-OutputArea-prompt {
display: table-cell;
vertical-align: top;
}
.jp-OutputArea-output {
display: table-cell;
height: auto;
overflow: auto;
user-select: text;
}
.jp-OutputArea-output pre {
border: none;
margin: 0;
overflow-x: auto;
overflow-y: auto;
word-break: break-all;
word-wrap: break-word;
white-space: pre-wrap;
}
.jp-OutputArea-output.jp-RenderedHTMLCommon table {
margin-left: 0;
margin-right: 0;
}
// Hide the gutter in case of
// - nested output areas (e.g. in the case of output widgets)
// - mirrored output areas
.jp-OutputArea .jp-OutputArea .jp-OutputArea-prompt {
display: none;
}
// executeResult is added to any Output-result for the display of the object returned by a cell.
.jp-OutputArea-output.jp-OutputArea-executeResult {
margin-left: 0;
width: 100%;
}
// Text output with the Out[] prompt needs a top padding to match the alignment of the Out[] prompt itself.
.jp-OutputArea-executeResult .jp-RenderedText.jp-OutputArea-output {
&:has(> pre.chroma) {
padding-left: 0;
}
}
/* RenderedText */
.jp-RenderedText {
text-align: left;
padding-left: 0.25rem;
font-family: var(--code-font-family);
}
/* Mobile */
@media only screen and (max-width: 760px) {
.jp-InputArea-editor {
display: table-row;
margin-left: var(--jp-notebook-padding);
}
.jp-InputPrompt {
display: table-row;
text-align: left;
}
.jp-OutputPrompt {
display: table-row;
text-align: left;
}
.jp-OutputArea-child .jp-OutputArea-output {
display: table-row;
margin-left: var(--jp-notebook-padding);
}
}
// Copied from markdown CSS
// Steps to generate colors / variables:
// 1. Run `chroma --html --style="github[-dark]" --html-all-styles`
// 2. Use multicursor and kebab-case to match formatting below
// 3. Generate preliminary color variables: `[css code here]`.split("\n").filter(_ => _.includes("color")).map(_ => `--${_.slice(_.indexOf("/* ")+3, _.indexOf(" */"))}-fg: ${_.slice(_.indexOf("color: ")+7, _.indexOf(" ", _.indexOf("color: ") + 7))}`).join("\n")
// 4. Paste them in `colors.scss` and fix any background-foreground issues
// 5. Prepend "chroma-" prefix to the child classes
.chroma {
/* stylelint-disable */
/* chroma-error */
.chroma-err {
color: var(--chroma-error-fg);
background-color: var(--chroma-error-bg);
}
/* chroma-line-link */
.chroma-lnlinks {
outline: none;
text-decoration: none;
color: var(--chroma-line-link-fg);
}
/* chroma-line-table-td */
.chroma-lntd {
vertical-align: top;
padding: 0;
margin: 0;
border: 0;
}
/* chroma-line-table */
.chroma-lntable {
border-spacing: 0;
padding: 0;
margin: 0;
border: 0;
}
/* chroma-line-highlight */
.chroma-hl {
background-color: var(--chroma-line-highlight-bg);
}
/* chroma-line-numbers-table */
.chroma-lnt {
white-space: pre;
user-select: none;
margin-right: 0.4rem;
padding: 0 0.4rem 0 0.4rem;
color: var(--chroma-line-numbers-table-fg);
}
/* chroma-line-numbers */
.chroma-ln {
white-space: pre;
user-select: none;
margin-right: 0.4rem;
padding: 0 0.4rem 0 0.4rem;
color: var(--chroma-line-numbers-fg);
}
/* chroma-line */
.chroma-line {
display: flex;
}
/* chroma-keyword */
.chroma-k {
color: var(--chroma-keyword-fg);
font-weight: bold;
}
/* chroma-keyword-constant */
.chroma-kc {
color: var(--chroma-keyword-constant-fg);
font-weight: bold;
}
/* chroma-keyword-declaration */
.chroma-kd {
color: var(--chroma-keyword-declaration-fg);
font-weight: bold;
}
/* chroma-keyword-namespace */
.chroma-kn {
color: var(--chroma-keyword-namespace-fg);
font-weight: bold;
}
/* chroma-keyword-pseudo */
.chroma-kp {
color: var(--chroma-keyword-pseudo-fg);
font-weight: bold;
}
/* chroma-keyword-reserved */
.chroma-kr {
color: var(--chroma-keyword-reserved-fg);
font-weight: bold;
}
/* chroma-keyword-type */
.chroma-kt {
color: var(--chroma-keyword-type-fg);
font-weight: bold;
}
/* chroma-name-attribute */
.chroma-na {
color: var(--chroma-name-attribute-fg);
}
/* chroma-name-builtin */
.chroma-nb {
color: var(--chroma-name-builtin-fg);
}
/* chroma-name-builtin-pseudo */
.chroma-bp {
color: var(--chroma-name-builtin-pseudo-fg);
}
/* chroma-name-class */
.chroma-nc {
color: var(--chroma-name-class-fg);
font-weight: bold;
}
/* chroma-name-constant */
.chroma-no {
color: var(--chroma-name-constant-fg);
}
/* chroma-name-decorator */
.chroma-nd {
color: var(--chroma-name-decorator-fg);
font-weight: bold;
}
/* chroma-name-entity */
.chroma-ni {
color: var(--chroma-name-entity-fg);
}
/* chroma-name-exception */
.chroma-ne {
color: var(--chroma-name-exception-fg);
font-weight: bold;
}
/* chroma-name-function */
.chroma-nf {
color: var(--chroma-name-function-fg);
font-weight: bold;
}
/* chroma-name-label */
.chroma-nl {
color: var(--chroma-name-label-fg);
font-weight: bold;
}
/* chroma-name-namespace */
.chroma-nn {
color: var(--chroma-name-namespace-fg);
}
/* chroma-name-tag */
.chroma-nt {
color: var(--chroma-name-tag-fg);
}
/* chroma-name-variable */
.chroma-nv {
color: var(--chroma-name-variable-fg);
}
/* chroma-name-variable-class */
.chroma-vc {
color: var(--chroma-name-variable-class-fg);
}
/* chroma-name-variable-global */
.chroma-vg {
color: var(--chroma-name-variable-global-fg);
}
/* chroma-name-variable-instance */
.chroma-vi {
color: var(--chroma-name-variable-instance-fg);
}
/* chroma-literal-string */
.chroma-s {
color: var(--chroma-literal-string-fg);
}
/* chroma-literal-string-affix */
.chroma-sa {
color: var(--chroma-literal-string-affix-fg);
}
/* chroma-literal-string-backtick */
.chroma-sb {
color: var(--chroma-literal-string-backtick-fg);
}
/* chroma-literal-string-char */
.chroma-sc {
color: var(--chroma-literal-string-char-fg);
}
/* chroma-literal-string-delimiter */
.chroma-dl {
color: var(--chroma-literal-string-delimiter-fg);
}
/* chroma-literal-string-doc */
.chroma-sd {
color: var(--chroma-literal-string-doc-fg);
}
/* chroma-literal-string-double */
.chroma-s2 {
color: var(--chroma-literal-string-double-fg);
}
/* chroma-literal-string-escape */
.chroma-se {
color: var(--chroma-literal-string-escape-fg);
}
/* chroma-literal-string-heredoc */
.chroma-sh {
color: var(--chroma-literal-string-heredoc-fg);
}
/* chroma-literal-string-interpol */
.chroma-si {
color: var(--chroma-literal-string-interpol-fg);
}
/* chroma-literal-string-other */
.chroma-sx {
color: var(--chroma-literal-string-other-fg);
}
/* chroma-literal-string-regex */
.chroma-sr {
color: var(--chroma-literal-string-regex-fg);
}
/* chroma-literal-string-single */
.chroma-s1 {
color: var(--chroma-literal-string-single-fg);
}
/* chroma-literal-string-symbol */
.chroma-ss {
color: var(--chroma-literal-string-symbol-fg);
}
/* chroma-literal-number */
.chroma-m {
color: var(--chroma-literal-number-fg);
}
/* chroma-literal-number-bin */
.chroma-mb {
color: var(--chroma-literal-number-bin-fg);
}
/* chroma-literal-number-float */
.chroma-mf {
color: var(--chroma-literal-number-float-fg);
}
/* chroma-literal-number-hex */
.chroma-mh {
color: var(--chroma-literal-number-hex-fg);
}
/* chroma-literal-number-integer */
.chroma-mi {
color: var(--chroma-literal-number-integer-fg);
}
/* chroma-literal-number-integer-long */
.chroma-il {
color: var(--chroma-literal-number-integer-long-fg);
}
/* chroma-literal-number-oct */
.chroma-mo {
color: var(--chroma-literal-number-oct-fg);
}
/* chroma-operator */
.chroma-o {
color: var(--chroma-operator-fg);
font-weight: bold;
}
/* chroma-operator-word */
.chroma-ow {
color: var(--chroma-operator-word-fg);
font-weight: bold;
}
/* chroma-comment */
.chroma-c {
color: var(--chroma-comment-fg);
font-style: italic;
}
/* chroma-comment-hashbang */
.chroma-ch {
color: var(--chroma-comment-hashbang-fg);
font-style: italic;
}
/* chroma-comment-multiline */
.chroma-cm {
color: var(--chroma-comment-multiline-fg);
font-style: italic;
}
/* chroma-comment-single */
.chroma-c1 {
color: var(--chroma-comment-single-fg);
font-style: italic;
}
/* chroma-comment-special */
.chroma-cs {
color: var(--chroma-comment-special-fg);
font-weight: bold;
font-style: italic;
}
/* chroma-comment-preproc */
.chroma-cp {
color: var(--chroma-comment-preproc-fg);
font-weight: bold;
font-style: italic;
}
/* chroma-comment-preproc-file */
.chroma-cpf {
color: var(--chroma-comment-preproc-file-fg);
font-weight: bold;
font-style: italic;
}
/* chroma-generic-deleted */
.chroma-gd {
color: var(--chroma-generic-deleted-fg);
background-color: var(--chroma-generic-deleted-bg);
}
/* chroma-generic-emph */
.chroma-ge {
color: var(--chroma-generic-emph-fg);
font-style: italic;
}
/* chroma-generic-error */
.chroma-gr {
color: var(--chroma-generic-error-fg);
}
/* chroma-generic-heading */
.chroma-gh {
color: var(--chroma-generic-heading-fg);
}
/* chroma-generic-inserted */
.chroma-gi {
color: var(--chroma-generic-inserted-fg);
background-color: var(--chroma-generic-inserted-bg);
}
/* chroma-generic-output */
.chroma-go {
color: var(--chroma-generic-output-fg);
}
/* chroma-generic-prompt */
.chroma-gp {
color: var(--chroma-generic-prompt-fg);
}
/* chroma-generic-strong */
.chroma-gs {
font-weight: bold;
}
/* chroma-generic-subheading */
.chroma-gu {
color: var(--chroma-generic-subheading-fg);
}
/* chroma-generic-traceback */
.chroma-gt {
color: var(--chroma-generic-traceback-fg);
}
/* chroma-generic-underline */
.chroma-gl {
text-decoration: underline;
}
/* chroma-text-whitespace */
.chroma-w {
color: var(--chroma-text-whitespace-fg);
}
}
}

View File

@ -295,6 +295,7 @@ go_library(
"//internal/honey/search",
"//internal/httpcli",
"//internal/insights",
"//internal/ipynb",
"//internal/jsonc",
"//internal/lazyregexp",
"//internal/licensing",

View File

@ -6,6 +6,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/externallink"
"github.com/sourcegraph/sourcegraph/internal/highlight"
"github.com/sourcegraph/sourcegraph/internal/ipynb"
"github.com/sourcegraph/sourcegraph/internal/markdown"
)
@ -33,11 +34,11 @@ type FileResolver interface {
func richHTML(content, ext string) (string, error) {
switch strings.ToLower(ext) {
case ".md", ".mdown", ".markdown", ".markdn":
break
default:
return "", nil
return markdown.Render(content)
case ".ipynb":
return ipynb.Render(content)
}
return markdown.Render(content)
return "", nil
}
type markdownOptions struct {

View File

@ -111,8 +111,8 @@ def go_dependencies():
name = "com_github_alecthomas_chroma_v2",
build_file_proto_mode = "disable_global",
importpath = "github.com/alecthomas/chroma/v2",
sum = "h1:CQCdj1BiBV17sD4Bd32b/Bzuiq/EqoNTrnIhyQAZ+Rk=",
version = "v2.5.0",
sum = "h1:Wh8qLEgMMsN7mgyG8/qIpegky2Hvzr4By6gEF7cmWgw=",
version = "v2.12.0",
)
go_repository(
name = "com_github_alecthomas_kingpin",
@ -702,6 +702,27 @@ def go_dependencies():
sum = "h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=",
version = "v1.0.1",
)
go_repository(
name = "com_github_bevzzz_nb",
build_file_proto_mode = "disable_global",
importpath = "github.com/bevzzz/nb",
sum = "h1:Pg2p4TXttIRquLZjz5cfffPfWeUCHVBPUhdcDhwbccI=",
version = "v0.3.0",
)
go_repository(
name = "com_github_bevzzz_nb_extension_extra_goldmark_jupyter",
build_file_proto_mode = "disable_global",
importpath = "github.com/bevzzz/nb/extension/extra/goldmark-jupyter",
sum = "h1:F/vUmFPZ6+URUo2WXyraB0JUkcpcHBFws9QYcv673l0=",
version = "v0.0.0-20240131001330-e69229bd9da4",
)
go_repository(
name = "com_github_bevzzz_nb_synth",
build_file_proto_mode = "disable_global",
importpath = "github.com/bevzzz/nb-synth",
sum = "h1:CH1+0p2ywCtqQbDL2KpwRn+XL71Peyhlshusdbn13kk=",
version = "v0.0.0-20240128164931-35fdda0583a0",
)
go_repository(
name = "com_github_bgentry_speakeasy",
build_file_proto_mode = "disable_global",
@ -5127,6 +5148,13 @@ def go_dependencies():
sum = "h1:aQmu9zQxDU0uhwR8SXOH/OrqEf+X8A0LQmwW3JX8Lcg=",
version = "v1.3.0",
)
go_repository(
name = "com_github_robert_nix_ansihtml",
build_file_proto_mode = "disable_global",
importpath = "github.com/robert-nix/ansihtml",
sum = "h1:VTiyQ6/+AxSJoSSLsMecnkh8i0ZqOEdiRl/odOc64fc=",
version = "v1.0.1",
)
go_repository(
name = "com_github_rogpeppe_fastuuid",
build_file_proto_mode = "disable_global",

6
go.mod
View File

@ -255,6 +255,9 @@ require (
github.com/aws/jsii-runtime-go v1.84.0
github.com/bazelbuild/bazel-gazelle v0.35.0
github.com/bazelbuild/rules_go v0.47.0
github.com/bevzzz/nb v0.3.0
github.com/bevzzz/nb-synth v0.0.0-20240128164931-35fdda0583a0
github.com/bevzzz/nb/extension/extra/goldmark-jupyter v0.0.0-20240131001330-e69229bd9da4
github.com/derision-test/go-mockgen/v2 v2.0.1
github.com/dghubble/gologin/v2 v2.4.0
github.com/edsrzf/mmap-go v1.1.0
@ -281,6 +284,7 @@ require (
github.com/pkoukk/tiktoken-go v0.1.6
github.com/pkoukk/tiktoken-go-loader v0.0.1
github.com/prometheus/statsd_exporter v0.22.7
github.com/robert-nix/ansihtml v1.0.1
github.com/sourcegraph/cloud-api v0.0.0-20240501113836-ecd1d4cba9dd
github.com/sourcegraph/log/logr v0.0.0-20240425170707-431bcb6c8668
github.com/sourcegraph/managed-services-platform-cdktf/gen/cloudflare v0.0.0-20230822024612-edb48c530722
@ -446,7 +450,7 @@ require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/alecthomas/chroma/v2 v2.5.0
github.com/alecthomas/chroma/v2 v2.12.0
github.com/alecthomas/kingpin v2.2.6+incompatible // indirect
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137

12
go.sum
View File

@ -178,8 +178,8 @@ github.com/alecthomas/assert/v2 v2.2.1/go.mod h1:pXcQ2Asjp247dahGEmsZ6ru0UVwnkhk
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/alecthomas/chroma/v2 v2.2.0/go.mod h1:vf4zrexSH54oEjJ7EdB65tGNHmH3pGZmVkgTP5RHvAs=
github.com/alecthomas/chroma/v2 v2.5.0 h1:CQCdj1BiBV17sD4Bd32b/Bzuiq/EqoNTrnIhyQAZ+Rk=
github.com/alecthomas/chroma/v2 v2.5.0/go.mod h1:yrkMI9807G1ROx13fhe1v6PN2DDeaR73L3d+1nmYQtw=
github.com/alecthomas/chroma/v2 v2.12.0 h1:Wh8qLEgMMsN7mgyG8/qIpegky2Hvzr4By6gEF7cmWgw=
github.com/alecthomas/chroma/v2 v2.12.0/go.mod h1:4TQu7gdfuPjSh76j78ietmqh9LiurGF0EpseFXdKMBw=
github.com/alecthomas/kingpin v2.2.6+incompatible h1:5svnBTFgJjZvGKyYBtMB0+m5wvrbUHiqye8wRJMlnYI=
github.com/alecthomas/kingpin v2.2.6+incompatible/go.mod h1:59OFYbFVLKQKq+mqrL6Rw5bR0c3ACQaawgXx0QYndlE=
github.com/alecthomas/repr v0.0.0-20220113201626-b1b626ac65ae/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
@ -321,6 +321,12 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bevzzz/nb v0.3.0 h1:Pg2p4TXttIRquLZjz5cfffPfWeUCHVBPUhdcDhwbccI=
github.com/bevzzz/nb v0.3.0/go.mod h1:i8J311U4tUD6ZjBDE3HY8qPswTuUORiUfAFcWPqUTdA=
github.com/bevzzz/nb-synth v0.0.0-20240128164931-35fdda0583a0 h1:CH1+0p2ywCtqQbDL2KpwRn+XL71Peyhlshusdbn13kk=
github.com/bevzzz/nb-synth v0.0.0-20240128164931-35fdda0583a0/go.mod h1:e7rTPaz8bZ1RKH/jysZpz4Hlj8X/HIh9UIXnEeRhTBc=
github.com/bevzzz/nb/extension/extra/goldmark-jupyter v0.0.0-20240131001330-e69229bd9da4 h1:F/vUmFPZ6+URUo2WXyraB0JUkcpcHBFws9QYcv673l0=
github.com/bevzzz/nb/extension/extra/goldmark-jupyter v0.0.0-20240131001330-e69229bd9da4/go.mod h1:Sv6EeiZd/9xiAJttSrXKS0z36/P0x5yYV30gt9bgDE4=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
@ -1606,6 +1612,8 @@ github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg=
github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rjeczalik/notify v0.9.3 h1:6rJAzHTGKXGj76sbRgDiDcYj/HniypXmSJo1SWakZeY=
github.com/rjeczalik/notify v0.9.3/go.mod h1:gF3zSOrafR9DQEWSE8TjfI9NkooDxbyT4UgRGKZA0lc=
github.com/robert-nix/ansihtml v1.0.1 h1:VTiyQ6/+AxSJoSSLsMecnkh8i0ZqOEdiRl/odOc64fc=
github.com/robert-nix/ansihtml v1.0.1/go.mod h1:CJwclxYaTPc2RfcxtanEACsYuTksh4yDXcNeHHKZINE=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=

View File

@ -0,0 +1,28 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("//dev:go_defs.bzl", "go_test")
go_library(
name = "htmlutil",
srcs = [
"policy.go",
"syntax.go",
],
importpath = "github.com/sourcegraph/sourcegraph/internal/htmlutil",
visibility = ["//:__subpackages__"],
deps = [
"@com_github_alecthomas_chroma_v2//:chroma",
"@com_github_alecthomas_chroma_v2//formatters/html",
"@com_github_microcosm_cc_bluemonday//:bluemonday",
],
)
go_test(
name = "htmlutil_test",
srcs = ["htmlutil_test.go"],
deps = [
":htmlutil",
"@com_github_alecthomas_chroma_v2//:chroma",
"@com_github_google_go_cmp//cmp",
"@com_github_stretchr_testify//require",
],
)

View File

@ -0,0 +1,160 @@
package htmlutil_test
import (
"strings"
"testing"
"github.com/alecthomas/chroma/v2"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/sourcegraph/sourcegraph/internal/htmlutil"
)
func TestSanitizationPolicy(t *testing.T) {
cmp := func(tb testing.TB, got, want string) {
tb.Helper()
if !cmp.Equal(want, got) {
tb.Fatalf("html not sanitized correctly: (+want) (-got)\n+ %s\n- %s", want, got)
}
}
for _, tt := range []struct {
name string
inputHTML string
want string
}{
{
name: "a.name contains whitespace, letters, numbers, hyphens, or underscores",
inputHTML: `<a name="safe link"/><a name="#illegal"/>`,
want: `<a name="safe link"/>`,
},
{
name: "only a.rel must be nofollow",
inputHTML: `<a rel="nofollow"/><a rel="illegal"/>`,
want: `<a rel="nofollow"/>`,
},
{
name: "a.class must be anchor",
inputHTML: `<a class="anchor"/><a class="illegal"/>`,
want: `<a class="anchor"/>`,
},
{
name: "a.aria-hidden must be true",
inputHTML: `<a aria-hidden="true"/><a aria-hidden="false"/>`,
want: `<a aria-hidden="true"/>`,
},
{
name: "input.type must be checkbox",
inputHTML: `<input type="checkbox"/><input type="number"/>`,
want: `<input type="checkbox"/>`,
},
{
name: "input.checked if true must be either empty or have no value",
inputHTML: `<input checked=""/><input checked/><input checked="checked"/><input checked="true"/>`,
want: `<input checked=""/><input checked=""/>`,
},
{
name: "input.disabled if true must be either empty or have no value",
inputHTML: `<input disabled=""/><input disabled/><input disabled="disabled"/><input disabled="true"/>`,
want: `<input disabled=""/><input disabled=""/>`,
},
{
name: "pre.class is either chroma or starts with chroma-",
inputHTML: `<pre class="chroma"/><pre class="chroma-c1"/><pre class="illegal">Keep me</pre>`,
want: `<pre class="chroma"/><pre class="chroma-c1"/><pre>Keep me</pre>`, // here we just drop the illegal class
},
{
name: "code.class is either chroma or starts with chroma-",
inputHTML: `<code class="chroma"/><code class="chroma-c1"/><code class="illegal">Keep me</code>`,
want: `<code class="chroma"/><code class="chroma-c1"/><code>Keep me</code>`, // here we just drop the illegal class
},
{
name: "span.class can be chroma or start with chroma-",
inputHTML: `<span class="chroma"/><span class="chroma-c1"/><span class="illegal">Keep me</span>`,
want: `<span class="chroma"/><span class="chroma-c1"/><span>Keep me</span>`, // here we just drop the illegal class
},
{
name: "span.class can start with ansi-",
inputHTML: `<span class="ansi-red"/><span class="illegal">Keep me</span>`,
want: `<span class="ansi-red"/><span>Keep me</span>`, // here we just drop the illegal class
},
{
name: "align is only allowed in img and p elements",
inputHTML: `<img align="top"/><p align="left"/><div align="right"></div>`,
want: `<img align="top"/><p align="left"/><div></div>`, // here we just drop the illegal attribute
},
{
name: "picture is only allowed sans-attributes",
inputHTML: `<picture class="good-one" width="900" src="example.com/pic"/>`,
want: `<picture/>`,
},
{
name: "allowed video attributes",
inputHTML: `<video src="example.com/video.mp4" poster="example.com/thumbnail.png" width="250" height="250" playsinline muted autoplay loop controls />`,
want: `<video src="example.com/video.mp4" poster="example.com/thumbnail.png" width="250" height="250" playsinline="" muted="" autoplay="" loop="" controls=""/>`,
},
{
name: "allowed video attributes",
inputHTML: `<video src="example.com/video.mp4" poster="example.com/thumbnail.png" width="250" height="250" playsinline muted autoplay loop controls />`,
want: `<video src="example.com/video.mp4" poster="example.com/thumbnail.png" width="250" height="250" playsinline="" muted="" autoplay="" loop="" controls=""/>`,
},
{
name: "allowed track attributes",
inputHTML: `<track src="example.com/track.vtt" kind="subtitles" srclang="en" default label="High Speed Dirt" />`,
want: `<track src="example.com/track.vtt" kind="subtitles" srclang="en" default="" label="High Speed Dirt"/>`,
},
{
name: "allowed source attributes",
inputHTML: `<source srcset="logo-wide.png" src="example.com/logo.png" type="image/png" media="{min-width: 800px}" width="600" height="900" sizes="1x,2x"/>`,
want: `<source srcset="logo-wide.png" src="example.com/logo.png" type="image/png" media="{min-width: 800px}" width="600" height="900" sizes="1x,2x"/>`,
},
{
name: "parseable fully-qualified links get target=_blank",
inputHTML: `<a href="https://example.com"/><area href="http://example.com"/>`,
want: `<a href="https://example.com" rel="nofollow noopener" target="_blank"/><area href="http://example.com" rel="nofollow"/>`,
},
} {
// htmlutil should enforce a set of sanitization rules to make sure the generated HTML is safe to render.
// We also want to make sure all 3 wrapper functions produce identical results.
t.Run(tt.name, func(t *testing.T) {
t.Run("Sanitize", func(t *testing.T) {
cmp(t, htmlutil.Sanitize(tt.inputHTML), tt.want)
})
t.Run("SanitizeBytes", func(t *testing.T) {
b := htmlutil.SanitizeBytes([]byte(tt.inputHTML))
cmp(t, string(b), tt.want)
})
t.Run("SanitizeReader", func(t *testing.T) {
r := strings.NewReader(tt.inputHTML)
cmp(t, htmlutil.SanitizeReader(r).String(), tt.want)
})
})
}
}
func TestPatchChromaTypes(t *testing.T) {
_ = htmlutil.SyntaxHighlightingOptions()
checkChromaTypes(t)
// Types are patched once, even if called multiple times.
_ = htmlutil.SyntaxHighlightingOptions()
checkChromaTypes(t)
}
// checkChromaTypes checks that all chroma types except chroma.PreWrapper
// have "chroma-" prefix.
func checkChromaTypes(tb testing.TB) {
tb.Helper()
prefix := "chroma-"
for t, cls := range chroma.StandardTypes {
has := strings.HasPrefix(cls, prefix)
if t == chroma.PreWrapper {
require.False(tb, has, "chroma.PreWrapper should not have a custom prefix, got %s", cls)
} else {
require.True(tb, has, "type %s should have %q prefix, got %q", t, prefix, cls)
}
}
}

View File

@ -0,0 +1,56 @@
// Package htmlutil provides common utils for working with user-generated HTML content,
// such as Markdown or Jupyter notebook conversions:
//
// - sanitization policy (bluemonday)
package htmlutil
import (
"bytes"
"io"
"regexp" // nolint // required for bluemonday API
"sync"
"github.com/microcosm-cc/bluemonday"
)
var (
chromaOnly = regexp.MustCompile(`^(?:chroma-[a-zA-Z0-9\-]+)|chroma$`)
chromaOrAnsi = regexp.MustCompile(`^(?:(chroma|ansi)-[a-zA-Z0-9\-]+)|chroma$`)
)
// policy configures a standard HTML sanitization policy.
var policy = sync.OnceValue(func() *bluemonday.Policy {
p := bluemonday.UGCPolicy()
p.AllowAttrs("name").Matching(bluemonday.SpaceSeparatedTokens).OnElements("a")
p.AllowAttrs("rel").Matching(regexp.MustCompile(`^nofollow$`)).OnElements("a")
p.AllowAttrs("class").Matching(regexp.MustCompile(`^anchor$`)).OnElements("a")
p.AllowAttrs("aria-hidden").Matching(regexp.MustCompile(`^true$`)).OnElements("a")
p.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
p.AllowAttrs("checked", "disabled").Matching(regexp.MustCompile(`^$`)).OnElements("input")
p.AllowAttrs("class").Matching(chromaOnly).OnElements("pre", "code")
p.AllowAttrs("class").Matching(chromaOrAnsi).OnElements("span")
p.AllowAttrs("class").Matching(regexp.MustCompile(`^jp-[a-zA-Z0-9\-]+`)).OnElements("div")
p.AllowAttrs("start").OnElements("ol")
p.AllowAttrs("align").OnElements("img", "p")
p.AllowElements("picture", "video", "track", "source")
p.AllowAttrs("srcset", "src", "type", "media", "width", "height", "sizes").OnElements("source")
p.AllowAttrs("playsinline", "muted", "autoplay", "loop", "controls", "width", "height", "poster", "src").OnElements("video")
p.AllowAttrs("src", "kind", "srclang", "default", "label").OnElements("track")
p.AddTargetBlankToFullyQualifiedLinks(true)
return p
})
// Sanitize applies a standard sanitization policy to an HTML string.
func Sanitize(s string) string {
return policy().Sanitize(s)
}
// SanitizeBytes applies a standard sanitization policy to raw HTML bytes.
func SanitizeBytes(b []byte) []byte {
return policy().SanitizeBytes(b)
}
// SanitizeReader applies a standard sanitization policy to an HTML stream.
func SanitizeReader(r io.Reader) *bytes.Buffer {
return policy().SanitizeReader(r)
}

View File

@ -0,0 +1,36 @@
package htmlutil
import (
"fmt"
"sync"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
)
// patchChromaTypes adds "chroma-" prefix to all chroma.StandardTypes except PreWrapper.
//
// To avoid modifying global state during package import, this will only be executed
// when chroma syntax highlighing is used for the first time.
var patchChromaTypes = sync.OnceFunc(func() {
origTypes := chroma.StandardTypes
sourcegraphTypes := map[chroma.TokenType]string{}
for k, v := range origTypes {
if k == chroma.PreWrapper {
sourcegraphTypes[k] = v
} else {
sourcegraphTypes[k] = fmt.Sprintf("chroma-%s", v)
}
}
chroma.StandardTypes = sourcegraphTypes
})
// SyntaxHighlightingOptions customize chroma code formatter.
func SyntaxHighlightingOptions() []html.Option {
patchChromaTypes()
return []html.Option{
html.WithClasses(true),
html.WithLineNumbers(false),
}
}

View File

@ -0,0 +1,19 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "ipynb",
srcs = ["ipynb.go"],
importpath = "github.com/sourcegraph/sourcegraph/internal/ipynb",
visibility = ["//:__subpackages__"],
deps = [
"//internal/htmlutil",
"//internal/markdown",
"//lib/errors",
"@com_github_bevzzz_nb//:nb",
"@com_github_bevzzz_nb//extension",
"@com_github_bevzzz_nb//extension/adapter",
"@com_github_bevzzz_nb_extension_extra_goldmark_jupyter//:goldmark-jupyter",
"@com_github_bevzzz_nb_synth//:nb-synth",
"@com_github_robert_nix_ansihtml//:ansihtml",
],
)

48
internal/ipynb/ipynb.go Normal file
View File

@ -0,0 +1,48 @@
package ipynb
import (
"bytes"
"sync"
"github.com/bevzzz/nb"
synth "github.com/bevzzz/nb-synth"
"github.com/bevzzz/nb/extension"
"github.com/bevzzz/nb/extension/adapter"
jupyter "github.com/bevzzz/nb/extension/extra/goldmark-jupyter"
"github.com/robert-nix/ansihtml"
"github.com/sourcegraph/sourcegraph/internal/htmlutil"
"github.com/sourcegraph/sourcegraph/internal/markdown"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
// Render renders Jupyter Notebook file (.ipynb) to sanitized HTML that is safe to render anywhere.
func Render(content string) (string, error) {
var buf bytes.Buffer
c := notebook()
if err := c.Convert(&buf, []byte(content)); err != nil {
return "", errors.Newf("ipynb.Render: %w", err)
}
return htmlutil.SanitizeReader(&buf).String(), nil
}
var notebook = sync.OnceValue(func() *nb.Notebook {
md := markdown.Goldmark()
return nb.New(
nb.WithExtensions(
jupyter.Goldmark(md),
synth.NewHighlighting(
synth.WithFormatOptions(
htmlutil.SyntaxHighlightingOptions()...,
),
),
extension.NewStream(adapter.AnsiHtml(ansi2html)),
),
)
})
// ansi2html calls ansihtml.ConvertToHTMLWithClasses with empty class prefix.
func ansi2html(b []byte) []byte {
return ansihtml.ConvertToHTMLWithClasses(b, "ansi-", false)
}

View File

@ -6,9 +6,9 @@ go_library(
importpath = "github.com/sourcegraph/sourcegraph/internal/markdown",
visibility = ["//:__subpackages__"],
deps = [
"@com_github_alecthomas_chroma_v2//:chroma",
"@com_github_alecthomas_chroma_v2//formatters/html",
"@com_github_microcosm_cc_bluemonday//:bluemonday",
"//internal/htmlutil",
"//lib/errors",
"@com_github_bevzzz_nb_extension_extra_goldmark_jupyter//:goldmark-jupyter",
"@com_github_yuin_goldmark//:goldmark",
"@com_github_yuin_goldmark//ast",
"@com_github_yuin_goldmark//extension",

View File

@ -1,14 +1,10 @@
package markdown
import (
"fmt"
"regexp" //nolint:depguard // bluemonday requires this pkg
"strings"
"bytes"
"sync"
"github.com/alecthomas/chroma/v2"
chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
"github.com/microcosm-cc/bluemonday"
jupyter "github.com/bevzzz/nb/extension/extra/goldmark-jupyter"
"github.com/yuin/goldmark"
highlighting "github.com/yuin/goldmark-highlighting/v2"
"github.com/yuin/goldmark/ast"
@ -17,77 +13,51 @@ import (
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
var (
once sync.Once
policy *bluemonday.Policy
renderer goldmark.Markdown
"github.com/sourcegraph/sourcegraph/internal/htmlutil"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
// Render renders Markdown content into sanitized HTML that is safe to render anywhere.
func Render(content string) (string, error) {
once.Do(func() {
policy = bluemonday.UGCPolicy()
policy.AllowAttrs("name").Matching(bluemonday.SpaceSeparatedTokens).OnElements("a")
policy.AllowAttrs("rel").Matching(regexp.MustCompile(`^nofollow$`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^anchor$`)).OnElements("a")
policy.AllowAttrs("aria-hidden").Matching(regexp.MustCompile(`^true$`)).OnElements("a")
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled").Matching(regexp.MustCompile(`^$`)).OnElements("input")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(?:chroma-[a-zA-Z0-9\-]+)|chroma$`)).OnElements("pre", "code", "span")
policy.AllowAttrs("align").OnElements("img", "p")
policy.AllowElements("picture", "video", "track", "source")
policy.AllowAttrs("srcset", "src", "type", "media", "width", "height", "sizes").OnElements("source")
policy.AllowAttrs("playsinline", "muted", "autoplay", "loop", "controls", "width", "height", "poster", "src").OnElements("video")
policy.AllowAttrs("src", "kind", "srclang", "default", "label").OnElements("track")
policy.AllowAttrs("start").OnElements("ol")
policy.AddTargetBlankToFullyQualifiedLinks(true)
html.LinkAttributeFilter.Add([]byte("aria-hidden"))
html.LinkAttributeFilter.Add([]byte("name"))
origTypes := chroma.StandardTypes
sourcegraphTypes := map[chroma.TokenType]string{}
for k, v := range origTypes {
if k == chroma.PreWrapper {
sourcegraphTypes[k] = v
} else {
sourcegraphTypes[k] = fmt.Sprintf("chroma-%s", v)
}
}
chroma.StandardTypes = sourcegraphTypes
renderer = goldmark.New(
goldmark.WithExtensions(
extension.GFM,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.WithLineNumbers(false),
),
),
),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(mdTransformFunc(mdLinkHeaders), 1)),
),
goldmark.WithRendererOptions(
// HTML sanitization is handled by bluemonday
html.WithUnsafe(),
),
)
})
var buf strings.Builder
if err := renderer.Convert([]byte(content), &buf); err != nil {
return "", err
var buf bytes.Buffer
if err := Goldmark().Convert([]byte(content), &buf); err != nil {
return "", errors.Newf("markdown.Render: %w", err)
}
return policy.Sanitize(buf.String()), nil
return htmlutil.SanitizeReader(&buf).String(), nil
}
// Goldmark returns a preconfigured Markdown renderer.
var Goldmark = sync.OnceValue(func() goldmark.Markdown {
html.LinkAttributeFilter.Add([]byte("aria-hidden"))
html.LinkAttributeFilter.Add([]byte("name"))
md := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
htmlutil.SyntaxHighlightingOptions()...,
),
),
jupyter.Attachments(),
),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(mdTransformFunc(mdLinkHeaders), 1)),
),
goldmark.WithRendererOptions(
// HTML sanitization is handled by htmlutil
html.WithUnsafe(),
),
)
return md
})
type mdTransformFunc func(*ast.Document, text.Reader, parser.Context)
var _ parser.ASTTransformer = new(mdTransformFunc)
func (f mdTransformFunc) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
f(node, reader, pc)
}