internal cmd: GHE repo feeder (#10892)

* internal cmd: GHE repo feeder

* feeder.db and split into files

* org generator

* add loggers

* rate limiter

* complete flow

* save orgs in db

* retry for push and clean up cloned dirs

* more flags instead of hardcoded

* more flags

* metrics and timeouts

* upping timeout

* fix merge conflict

* better metrics

* inc error metric

* process failed on subsequent runs

* custom err

* resume metrics

* metric fix

* clean up cloned dir in all cases

* fix terminal progress bar calculation

* ghe feeder dashboard

* accomodate dave input

* accept dirs when computing work

* rate limit cloning to avoid triggering an abuse detection mechanism

* ability to skip ahead in input and record error type

* golangci-lint

* prettier over dashboard json

* documentation for worker

* pump documentation

* documentation for sqlite feeder DB

* README

* README 2

* doc fixes

* readme prettier

* Update internal/cmd/ghe-feeder/progress.go

Co-authored-by: ᴜɴᴋɴᴡᴏɴ <joe@sourcegraph.com>

* code review joe

* readme explain resume and dashboard

* prettier readme

Co-authored-by: Dave Try <dave@sourcegraph.com>
Co-authored-by: ᴜɴᴋɴᴡᴏɴ <joe@sourcegraph.com>
This commit is contained in:
uwedeportivo 2020-05-28 12:44:07 -07:00 committed by GitHub
parent cae80e4f9b
commit de25b2f2ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 2529 additions and 0 deletions

1
.github/CODEOWNERS vendored
View File

@ -35,6 +35,7 @@
/cmd/frontend/internal/httpapi @slimsag
/cmd/frontend/types @slimsag
/cmd/frontend/hooks @slimsag
/internal/cmd/ghe-feeder @uwedeportivo
/internal/comby @rvantonder
/internal/db/ @keegancsmith
/internal/processrestart @slimsag @keegancsmith

2
go.mod
View File

@ -43,6 +43,7 @@ require (
github.com/google/go-cmp v0.4.1
github.com/google/go-github v17.0.0+incompatible
github.com/google/go-github/v28 v28.1.1
github.com/google/go-github/v31 v31.0.0
github.com/google/go-querystring v1.0.0
github.com/google/uuid v1.1.1
github.com/google/zoekt v0.0.0-20200324103759-172db892e9a2
@ -101,6 +102,7 @@ require (
github.com/russellhaering/gosaml2 v0.4.0
github.com/russellhaering/goxmldsig v0.0.0-20180430223755-7acd5e4a6ef7
github.com/russross/blackfriday v2.0.0+incompatible // indirect
github.com/schollz/progressbar/v3 v3.3.3
github.com/segmentio/fasthash v1.0.1
github.com/sergi/go-diff v1.1.0
github.com/shirou/gopsutil v2.20.3+incompatible // indirect

9
go.sum
View File

@ -430,6 +430,8 @@ github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+u
github.com/google/go-github/v27 v27.0.6/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0=
github.com/google/go-github/v28 v28.1.1 h1:kORf5ekX5qwXO2mGzXXOjMe/g6ap8ahVe0sBEulhSxo=
github.com/google/go-github/v28 v28.1.1/go.mod h1:bsqJWQX05omyWVmc00nEUql9mhQyv38lDZ8kPZcQVoM=
github.com/google/go-github/v31 v31.0.0 h1:JJUxlP9lFK+ziXKimTCprajMApV1ecWD4NB6CCb0plo=
github.com/google/go-github/v31 v31.0.0/go.mod h1:NQPZol8/1sMoWYGN2yaALIBytu17gAWfhbweiEed3pM=
github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@ -578,6 +580,7 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uiaSepXwyf3o52HaUYcV+Tu66S3F5GA=
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/karlseguin/expect v1.0.6 h1:9LkCpHKLd1bEXFMIXwhbS1MJeET5sLI7huPMYNeDy1c=
@ -666,6 +669,8 @@ github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGe
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-sqlite3 v1.10.0 h1:jbhqpg7tQe4SupckyijYiy0mJJ/pRyHvXf7JdWK860o=
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
@ -680,6 +685,8 @@ github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1f
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/microcosm-cc/bluemonday v1.0.2 h1:5lPfLTTAvAbtS0VqT+94yOtFnGfUWYyx0+iToC3Os3s=
github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b/go.mod h1:r1VsdOzOPt1ZSrGZWFoNhsAedKnEd6r9Np1+5blZCWk=
@ -814,6 +821,8 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/schollz/progressbar/v3 v3.3.3 h1:woop83iT9IwNMhawXBgHTlAAOwUj4Nnr1RvX2LkkJTs=
github.com/schollz/progressbar/v3 v3.3.3/go.mod h1:N/820QRS3ua9DhrVnLShsNgAEKNYFd89Cf5syXfqeyQ=
github.com/securego/gosec v0.0.0-20200103095621-79fbf3af8d83/go.mod h1:vvbZ2Ae7AzSq3/kywjUDxSNq2SJ27RxCz2un0H3ePqE=
github.com/securego/gosec v0.0.0-20200302134848-c998389da2ac/go.mod h1:NurAFZsWJAEZjogSwdVPlHkOZB3DOAU7gsPP8VFZCHc=
github.com/segmentio/fasthash v1.0.1 h1:U+9f+rh5LxMOquTrEKNw1Z3JgsBlms9QoReNfUo+fws=

View File

@ -0,0 +1,129 @@
# GHE feeder tool
Feeds repositories into a destination GHE instance. Input are files with owner/repo strings on each line and/or
directories containing such files (see below for details about the expected input).
The https://github.com/owner/repo repositories specified on the input lines are cloned and then pushed to a destination
GHE instance. Every once in a while new orgs are created and the repos are added to those orgs.
## Usage
```shell script
ghe-feeder -admin milton -token <xxxxxxx> -scratchDir clones -numWorkers 20 -limit 100000 -baseURL https://ghe.sgdev.org/api/v3 dir1 file2 file3 dir4
```
```
ghe-feeder -help
-admin string
(required) destination GHE admin name
-apiCallsPerSec float
how many API calls per sec to destination GHE (default 100)
-baseURL string
(required) base URL of GHE instance to feed
-cloneRepoTimeout duration
how long to wait for a repo to clone (default 3m0s)
-help
Show help
-limit int
limit processing to this many repos (for debugging) (default 9223372036854775807)
-logfile string
path to a log file (default "feeder.log")
-numCloningAttempts int
number of cloning attempts before giving up (default 5)
-numSimultaneousClones int
number of simultaneous github.com clones (default 10)
-numSimultaneousPushes int
number of simultaneous GHE pushes (default 10)
-numWorkers int
number of workers (default 20)
-progress string
path to a sqlite DB recording the progress made in the feeder (created if it doesn't exist) (default "feeder.db")
-scratchDir string
scratch dir where to temporarily clone repositories
-skip int
skip this many lines from input
-token string
(required) GitHub personal access token for the destination GHE instance
-uploadURL string
upload URL of GHE instance to feed
```
## Inputs
Inputs are command line arguments consisting of files and directories. Files need to end in .json, .txt or .csv.
Directories are traversed recursively for such files.
If the input file is a .json or .txt file it expects an owner/repo string per line (and nothing else). For example:
```
"FrankBGao/try"
"NicoAN42/nav_tool"
"hcjk/AttendantTips"
"orsenthil/cpython-hg-git-test-4"
"AndreKR/one-click-hugo-cms"
"H-Len/counting-up-loop"
"PyladiesFortaleza/pyladiesfortaleza.github.io"
"vikingsc2007/VFramework"
"gitter-badger/Caelan.Frameworks.DAL"
"samma89/BabylonAuth"
"esso23/Android-wamp-client"
"jamesmartin/dotfiles"
"OatmealTokyo/hello-world"
"ernrico21/ing2"
"rto07/Checkpoint-3-Practice"
"obj1-unahur-2018s2/parcial1-turno-manana-MagaliDumit"
"BitterPepper/StoreExample"
"dnilsson1/React_Dashboard"
"direwolf-github/my-app-ec4f431a"
```
Lines can be quoted or not (quotes will be stripped).
If the input file is a .csv then owner/Repo is expected in position 6 (1-based counting) and owner is separated from repo
by the first dash. For example:
```
2019-05-23 15:22:44 -0700,4,Organization,sourcegraph,1,sourcegraph-sirupsen-logrus,public,0 Bytes,0,0,false,false
2019-05-23 15:22:47 -0700,4,Organization,sourcegraph,2,sourcegraph-inconshreveable-ngrok,public,0 Bytes,0,0,false,false
2019-05-23 15:22:47 -0700,4,Organization,sourcegraph,3,sourcegraph-spf13-cobra,public,0 Bytes,0,0,false,false
2019-05-23 15:24:11 -0700,4,Organization,sourcegraph,4,joewalnes-websocketd,public,1.01 MB,1039,0,false,false
2019-05-23 15:24:11 -0700,4,Organization,sourcegraph,5,sirupsen-logrus,public,804 KB,804,0,false,false
2019-05-23 15:24:11 -0700,4,Organization,sourcegraph,6,inconshreveable-ngrok,public,664 KB,664,0,false,false
2019-05-23 15:24:11 -0700,4,Organization,sourcegraph,7,urfave-cli,public,1.17 MB,1194,0,false,false
2019-05-23 15:24:12 -0700,4,Organization,sourcegraph,8,spf13-cobra,public,1020 KB,1015,0,false,false
2019-05-23 15:24:16 -0700,4,Organization,sourcegraph,9,tsenart-vegeta,public,1.64 MB,1683,0,false,false
2019-05-23 15:24:17 -0700,4,Organization,sourcegraph,10,gin-gonic-gin,public,1.87 MB,1917,0,false,false
2019-05-23 15:24:18 -0700,4,Organization,sourcegraph,11,FiloSottile-mkcert,public,1.74 MB,1778,0,false,false
```
The CSV files are from reports in the GHE instance which explains the field format.
## Resuming from previous runs and other input controls
The `ghe-feeder` command keeps track of processed ownerRepos strings in a sqlite DB
(by default called `feeder.db` in the current directory).
It records in a table called `repos` all the ownerRepos with their success or failure status and for failure the errType.
It also records the orgs it creates in an additional table called `orgs`.
The errType in the `repos` table is has these possible values:
- `api` for errors using the GHE API talking to the destination GHE (creating repos and orgs)
- `clone` for errors while cloning from github.com.
- `push` for errors doing a git push command with the destination GHE as a remote
- `unknown` for all other errors
The `feeder.db` can be used to replay and skip the already done ownerRepos. Note that it also skips ownerRepos with
errType == `clone` (the assumption is that those are either private repos for which no credentials are available or
404 for repos that got deleted since).
In addition to the `feeder.db` one can control which inputs get processed by specifying a limit on the number of lines
being processed and also by specifying how many lines to skip before starting to process. Make sure you use the same
inputs declared in the same way if you want to skip this way.
## Monitoring progress
The `ghe-feeder` command shows progress in the terminal with a progress bar,
writes into a log file (by default `feeder.log` in the current directory). In addition to that it runs a webserver and
exports metrics. By pointing a prometheus/grafana pair at it one can see progress metrics, successes vs failures etc.
The provided dashboard `ghe-feeder-dashboard.json` can be used for that once imported into the grafana.

View File

@ -0,0 +1,625 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 3,
"links": [],
"panels": [
{
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 13,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": ["mean"],
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "7.0.0",
"targets": [
{
"expr": "sum(ghe_feeder_processed)/ (sum(ghe_feeder_remaining_work) + sum(ghe_feeder_processed)) * 100",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "percent done",
"type": "gauge"
},
{
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["last"],
"values": false
}
},
"pluginVersion": "7.0.0",
"targets": [
{
"expr": "sum(ghe_feeder_failed)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "failed",
"type": "stat"
},
{
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["last"],
"values": false
}
},
"pluginVersion": "7.0.0",
"targets": [
{
"expr": "sum (ghe_feeder_succeeded)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "succeeded",
"type": "stat"
},
{
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["last"],
"values": false
}
},
"pluginVersion": "7.0.0",
"targets": [
{
"expr": "ghe_feeder_remaining_work",
"interval": "",
"legendFormat": "remaining",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "remaining",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum (rate(ghe_feeder_succeeded[5m]))",
"interval": "",
"legendFormat": "succeeded",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "succeeded",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 16
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (err_type) (rate(ghe_feeder_failed[5m]))",
"interval": "",
"legendFormat": "failed {{err_type}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "failed",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 25
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum (rate(ghe_feeder_processed[5m]))",
"interval": "",
"legendFormat": "processed",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "processed",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 25
},
"hiddenSeries": false,
"id": 11,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (err_type) (rate(ghe_feeder_failed{err_type!=\"clone\"}[5m]))",
"interval": "",
"legendFormat": "failed {{err_type}}",
"refId": "B"
},
{
"expr": "sum (rate(ghe_feeder_succeeded[5m]))",
"interval": "",
"legendFormat": "succeeded",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "succeeded vs failed because of GHE",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "10s",
"schemaVersion": 25,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": ["10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"]
},
"timezone": "",
"title": "GHE Feeder",
"uid": "dB3LtzzMk",
"version": 29
}

View File

@ -0,0 +1,235 @@
package main
import (
"context"
"flag"
"fmt"
"io/ioutil"
"log"
"math"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
"sync"
"time"
"github.com/inconshreveable/log15"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/schollz/progressbar/v3"
"golang.org/x/time/rate"
)
var (
reposProcessedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ghe_feeder_processed",
Help: "The total number of processed repos (labels: worker)",
}, []string{"worker"})
reposFailedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "ghe_feeder_failed",
Help: "The total number of failed repos (labels: worker, err_type with values {clone, api, push, unknown}",
}, []string{"worker", "err_type"})
reposSucceededCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "ghe_feeder_succeeded",
Help: "The total number of succeeded repos",
})
reposAlreadyDoneCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "ghe_feeder_skipped",
Help: "The total number of repos already done in previous runs (found in feeder.db)",
})
remainingWorkGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "ghe_feeder_remaining_work",
Help: "The number of repos that still need to be processed from the specified input",
})
)
func main() {
admin := flag.String("admin", "", "(required) destination GHE admin name")
token := flag.String("token", os.Getenv("GITHUB_TOKEN"), "(required) GitHub personal access token for the destination GHE instance")
progressFilepath := flag.String("progress", "feeder.db", "path to a sqlite DB recording the progress made in the feeder (created if it doesn't exist)")
baseURL := flag.String("baseURL", "", "(required) base URL of GHE instance to feed")
uploadURL := flag.String("uploadURL", "", "upload URL of GHE instance to feed")
numWorkers := flag.Int("numWorkers", 20, "number of workers")
scratchDir := flag.String("scratchDir", "", "scratch dir where to temporarily clone repositories")
limitPump := flag.Int64("limit", math.MaxInt64, "limit processing to this many repos (for debugging)")
skipNumLines := flag.Int64("skip", 0, "skip this many lines from input")
logFilepath := flag.String("logfile", "feeder.log", "path to a log file")
apiCallsPerSec := flag.Float64("apiCallsPerSec", 100.0, "how many API calls per sec to destination GHE")
numSimultaneousPushes := flag.Int("numSimultaneousPushes", 10, "number of simultaneous GHE pushes")
cloneRepoTimeout := flag.Duration("cloneRepoTimeout", time.Minute*3, "how long to wait for a repo to clone")
numCloningAttempts := flag.Int("numCloningAttempts", 5, "number of cloning attempts before giving up")
numSimultaneousClones := flag.Int("numSimultaneousClones", 10, "number of simultaneous github.com clones")
help := flag.Bool("help", false, "Show help")
flag.Parse()
logHandler, err := log15.FileHandler(*logFilepath, log15.LogfmtFormat())
if err != nil {
log.Fatal(err)
}
log15.Root().SetHandler(logHandler)
if *help || len(*baseURL) == 0 || len(*token) == 0 || len(*admin) == 0 {
flag.PrintDefaults()
os.Exit(0)
}
if len(*uploadURL) == 0 {
*uploadURL = *baseURL
}
if len(*scratchDir) == 0 {
d, err := ioutil.TempDir("", "ghe-feeder")
if err != nil {
log15.Error("failed to create scratch dir", "error", err)
os.Exit(1)
}
*scratchDir = d
}
u, err := url.Parse(*baseURL)
if err != nil {
log15.Error("failed to parse base URL", "baseURL", *baseURL, "error", err)
os.Exit(1)
}
host := u.Host
ctx := context.Background()
gheClient, err := newGHEClient(ctx, *baseURL, *uploadURL, *token)
if err != nil {
log15.Error("failed to create GHE client", "error", err)
os.Exit(1)
}
fdr, err := newFeederDB(*progressFilepath)
if err != nil {
log15.Error("failed to create sqlite DB", "path", *progressFilepath, "error", err)
os.Exit(1)
}
spinner := progressbar.Default(-1, "calculating work")
numLines, err := numLinesTotal(*skipNumLines)
if err != nil {
log15.Error("failed to calculate outstanding work", "error", err)
os.Exit(1)
}
_ = spinner.Finish()
if numLines > *limitPump {
numLines = *limitPump
}
if numLines == 0 {
log15.Info("no work remaining in input")
fmt.Println("no work remaining in input, exiting")
os.Exit(0)
}
remainingWorkGauge.Set(float64(numLines))
bar := progressbar.New64(numLines)
work := make(chan string)
prdc := &producer{
remaining: *limitPump,
pipe: work,
fdr: fdr,
logger: log15.New("source", "producer"),
bar: bar,
skipNumLines: *skipNumLines,
}
var wg sync.WaitGroup
wg.Add(*numWorkers)
// trap Ctrl+C and call cancel on the context
ctx, cancel := context.WithCancel(ctx)
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
defer func() {
signal.Stop(c)
cancel()
}()
go func() {
select {
case <-c:
cancel()
case <-ctx.Done():
}
}()
go func() {
http.Handle("/metrics", promhttp.Handler())
_ = http.ListenAndServe(":2112", nil)
}()
rateLimiter := rate.NewLimiter(rate.Limit(*apiCallsPerSec), 100)
pushSem := make(chan struct{}, *numSimultaneousPushes)
cloneSem := make(chan struct{}, *numSimultaneousClones)
var wkrs []*worker
for i := 0; i < *numWorkers; i++ {
name := fmt.Sprintf("worker-%d", i)
wkrScratchDir := filepath.Join(*scratchDir, name)
err := os.MkdirAll(wkrScratchDir, 0777)
if err != nil {
log15.Error("failed to create worker scratch dir", "scratchDir", *scratchDir, "error", err)
os.Exit(1)
}
wkr := &worker{
name: name,
client: gheClient,
index: i,
scratchDir: wkrScratchDir,
work: work,
wg: &wg,
bar: bar,
fdr: fdr,
logger: log15.New("source", name),
rateLimiter: rateLimiter,
admin: *admin,
token: *token,
host: host,
pushSem: pushSem,
cloneSem: cloneSem,
cloneRepoTimeout: *cloneRepoTimeout,
numCloningAttempts: *numCloningAttempts,
}
wkrs = append(wkrs, wkr)
go wkr.run(ctx)
}
err = prdc.pump(ctx)
if err != nil {
log15.Error("pump failed", "error", err)
os.Exit(1)
}
close(work)
wg.Wait()
_ = bar.Finish()
s := stats(wkrs, prdc)
fmt.Println(s)
log15.Info(s)
}
func stats(wkrs []*worker, prdc *producer) string {
var numProcessed, numSucceeded, numFailed int64
for _, wkr := range wkrs {
numProcessed += wkr.numSucceeded + wkr.numFailed
numFailed += wkr.numFailed
numSucceeded += wkr.numSucceeded
}
return fmt.Sprintf("\n\nDone: processed %d, succeeded: %d, failed: %d, skipped: %d\n",
numProcessed, numSucceeded, numFailed, prdc.numAlreadyDone)
}

View File

@ -0,0 +1,852 @@
package main
import (
"fmt"
"math/rand"
)
var (
left = [...]string{
"admiring",
"adoring",
"affectionate",
"agitated",
"amazing",
"angry",
"awesome",
"beautiful",
"blissful",
"bold",
"boring",
"brave",
"busy",
"charming",
"clever",
"cool",
"compassionate",
"competent",
"condescending",
"confident",
"cranky",
"crazy",
"dazzling",
"determined",
"distracted",
"dreamy",
"eager",
"ecstatic",
"elastic",
"elated",
"elegant",
"eloquent",
"epic",
"exciting",
"fervent",
"festive",
"flamboyant",
"focused",
"friendly",
"frosty",
"funny",
"gallant",
"gifted",
"goofy",
"gracious",
"great",
"happy",
"hardcore",
"heuristic",
"hopeful",
"hungry",
"infallible",
"inspiring",
"interesting",
"intelligent",
"jolly",
"jovial",
"keen",
"kind",
"laughing",
"loving",
"lucid",
"magical",
"mystifying",
"modest",
"musing",
"naughty",
"nervous",
"nice",
"nifty",
"nostalgic",
"objective",
"optimistic",
"peaceful",
"pedantic",
"pensive",
"practical",
"priceless",
"quirky",
"quizzical",
"recursing",
"relaxed",
"reverent",
"romantic",
"sad",
"serene",
"sharp",
"silly",
"sleepy",
"stoic",
"strange",
"stupefied",
"suspicious",
"sweet",
"tender",
"thirsty",
"trusting",
"unruffled",
"upbeat",
"vibrant",
"vigilant",
"vigorous",
"wizardly",
"wonderful",
"xenodochial",
"youthful",
"zealous",
"zen",
}
// Docker, starting from 0.7.x, generates names from notable scientists and hackers.
// Please, for any amazing man that you add to the list, consider adding an equally amazing woman to it, and vice versa.
right = [...]string{
// Muhammad ibn Jābir al-Ḥarrānī al-Battānī was a founding father of astronomy. https://en.wikipedia.org/wiki/Mu%E1%B8%A5ammad_ibn_J%C4%81bir_al-%E1%B8%A4arr%C4%81n%C4%AB_al-Batt%C4%81n%C4%AB
"albattani",
// Frances E. Allen, became the first female IBM Fellow in 1989. In 2006, she became the first female recipient of the ACM's Turing Award. https://en.wikipedia.org/wiki/Frances_E._Allen
"allen",
// June Almeida - Scottish virologist who took the first pictures of the rubella virus - https://en.wikipedia.org/wiki/June_Almeida
"almeida",
// Kathleen Antonelli, American computer programmer and one of the six original programmers of the ENIAC - https://en.wikipedia.org/wiki/Kathleen_Antonelli
"antonelli",
// Maria Gaetana Agnesi - Italian mathematician, philosopher, theologian and humanitarian. She was the first woman to write a mathematics handbook and the first woman appointed as a Mathematics Professor at a University. https://en.wikipedia.org/wiki/Maria_Gaetana_Agnesi
"agnesi",
// Archimedes was a physicist, engineer and mathematician who invented too many things to list them here. https://en.wikipedia.org/wiki/Archimedes
"archimedes",
// Maria Ardinghelli - Italian translator, mathematician and physicist - https://en.wikipedia.org/wiki/Maria_Ardinghelli
"ardinghelli",
// Aryabhata - Ancient Indian mathematician-astronomer during 476-550 CE https://en.wikipedia.org/wiki/Aryabhata
"aryabhata",
// Wanda Austin - Wanda Austin is the President and CEO of The Aerospace Corporation, a leading architect for the US security space programs. https://en.wikipedia.org/wiki/Wanda_Austin
"austin",
// Charles Babbage invented the concept of a programmable computer. https://en.wikipedia.org/wiki/Charles_Babbage.
"babbage",
// Stefan Banach - Polish mathematician, was one of the founders of modern functional analysis. https://en.wikipedia.org/wiki/Stefan_Banach
"banach",
// Buckaroo Banzai and his mentor Dr. Hikita perfected the "oscillation overthruster", a device that allows one to pass through solid matter. - https://en.wikipedia.org/wiki/The_Adventures_of_Buckaroo_Banzai_Across_the_8th_Dimension
"banzai",
// John Bardeen co-invented the transistor - https://en.wikipedia.org/wiki/John_Bardeen
"bardeen",
// Jean Bartik, born Betty Jean Jennings, was one of the original programmers for the ENIAC computer. https://en.wikipedia.org/wiki/Jean_Bartik
"bartik",
// Laura Bassi, the world's first female professor https://en.wikipedia.org/wiki/Laura_Bassi
"bassi",
// Hugh Beaver, British engineer, founder of the Guinness Book of World Records https://en.wikipedia.org/wiki/Hugh_Beaver
"beaver",
// Alexander Graham Bell - an eminent Scottish-born scientist, inventor, engineer and innovator who is credited with inventing the first practical telephone - https://en.wikipedia.org/wiki/Alexander_Graham_Bell
"bell",
// Karl Friedrich Benz - a German automobile engineer. Inventor of the first practical motorcar. https://en.wikipedia.org/wiki/Karl_Benz
"benz",
// Homi J Bhabha - was an Indian nuclear physicist, founding director, and professor of physics at the Tata Institute of Fundamental Research. Colloquially known as "father of Indian nuclear programme"- https://en.wikipedia.org/wiki/Homi_J._Bhabha
"bhabha",
// Bhaskara II - Ancient Indian mathematician-astronomer whose work on calculus predates Newton and Leibniz by over half a millennium - https://en.wikipedia.org/wiki/Bh%C4%81skara_II#Calculus
"bhaskara",
// Sue Black - British computer scientist and campaigner. She has been instrumental in saving Bletchley Park, the site of World War II codebreaking - https://en.wikipedia.org/wiki/Sue_Black_(computer_scientist)
"black",
// Elizabeth Helen Blackburn - Australian-American Nobel laureate; best known for co-discovering telomerase. https://en.wikipedia.org/wiki/Elizabeth_Blackburn
"blackburn",
// Elizabeth Blackwell - American doctor and first American woman to receive a medical degree - https://en.wikipedia.org/wiki/Elizabeth_Blackwell
"blackwell",
// Niels Bohr is the father of quantum theory. https://en.wikipedia.org/wiki/Niels_Bohr.
"bohr",
// Kathleen Booth, she's credited with writing the first assembly language. https://en.wikipedia.org/wiki/Kathleen_Booth
"booth",
// Anita Borg - Anita Borg was the founding director of the Institute for Women and Technology (IWT). https://en.wikipedia.org/wiki/Anita_Borg
"borg",
// Satyendra Nath Bose - He provided the foundation for BoseEinstein statistics and the theory of the BoseEinstein condensate. - https://en.wikipedia.org/wiki/Satyendra_Nath_Bose
"bose",
// Katherine Louise Bouman is an imaging scientist and Assistant Professor of Computer Science at the California Institute of Technology. She researches computational methods for imaging, and developed an algorithm that made possible the picture first visualization of a black hole using the Event Horizon Telescope. - https://en.wikipedia.org/wiki/Katie_Bouman
"bouman",
// Evelyn Boyd Granville - She was one of the first African-American woman to receive a Ph.D. in mathematics; she earned it in 1949 from Yale University. https://en.wikipedia.org/wiki/Evelyn_Boyd_Granville
"boyd",
// Brahmagupta - Ancient Indian mathematician during 598-670 CE who gave rules to compute with zero - https://en.wikipedia.org/wiki/Brahmagupta#Zero
"brahmagupta",
// Walter Houser Brattain co-invented the transistor - https://en.wikipedia.org/wiki/Walter_Houser_Brattain
"brattain",
// Emmett Brown invented time travel. https://en.wikipedia.org/wiki/Emmett_Brown (thanks Brian Goff)
"brown",
// Linda Brown Buck - American biologist and Nobel laureate best known for her genetic and molecular analyses of the mechanisms of smell. https://en.wikipedia.org/wiki/Linda_B._Buck
"buck",
// Dame Susan Jocelyn Bell Burnell - Northern Irish astrophysicist who discovered radio pulsars and was the first to analyse them. https://en.wikipedia.org/wiki/Jocelyn_Bell_Burnell
"burnell",
// Annie Jump Cannon - pioneering female astronomer who classified hundreds of thousands of stars and created the system we use to understand stars today. https://en.wikipedia.org/wiki/Annie_Jump_Cannon
"cannon",
// Rachel Carson - American marine biologist and conservationist, her book Silent Spring and other writings are credited with advancing the global environmental movement. https://en.wikipedia.org/wiki/Rachel_Carson
"carson",
// Dame Mary Lucy Cartwright - British mathematician who was one of the first to study what is now known as chaos theory. Also known for Cartwright's theorem which finds applications in signal processing. https://en.wikipedia.org/wiki/Mary_Cartwright
"cartwright",
// George Washington Carver - American agricultural scientist and inventor. He was the most prominent black scientist of the early 20th century. https://en.wikipedia.org/wiki/George_Washington_Carver
"carver",
// Vinton Gray Cerf - American Internet pioneer, recognised as one of "the fathers of the Internet". With Robert Elliot Kahn, he designed TCP and IP, the primary data communication protocols of the Internet and other computer networks. https://en.wikipedia.org/wiki/Vint_Cerf
"cerf",
// Subrahmanyan Chandrasekhar - Astrophysicist known for his mathematical theory on different stages and evolution in structures of the stars. He has won nobel prize for physics - https://en.wikipedia.org/wiki/Subrahmanyan_Chandrasekhar
"chandrasekhar",
// Sergey Alexeyevich Chaplygin (Russian: Серге́й Алексе́евич Чаплы́гин; April 5, 1869 October 8, 1942) was a Russian and Soviet physicist, mathematician, and mechanical engineer. He is known for mathematical formulas such as Chaplygin's equation and for a hypothetical substance in cosmology called Chaplygin gas, named after him. https://en.wikipedia.org/wiki/Sergey_Chaplygin
"chaplygin",
// Émilie du Châtelet - French natural philosopher, mathematician, physicist, and author during the early 1730s, known for her translation of and commentary on Isaac Newton's book Principia containing basic laws of physics. https://en.wikipedia.org/wiki/%C3%89milie_du_Ch%C3%A2telet
"chatelet",
// Asima Chatterjee was an Indian organic chemist noted for her research on vinca alkaloids, development of drugs for treatment of epilepsy and malaria - https://en.wikipedia.org/wiki/Asima_Chatterjee
"chatterjee",
// Pafnuty Chebyshev - Russian mathematician. He is known fo his works on probability, statistics, mechanics, analytical geometry and number theory https://en.wikipedia.org/wiki/Pafnuty_Chebyshev
"chebyshev",
// Bram Cohen - American computer programmer and author of the BitTorrent peer-to-peer protocol. https://en.wikipedia.org/wiki/Bram_Cohen
"cohen",
// David Lee Chaum - American computer scientist and cryptographer. Known for his seminal contributions in the field of anonymous communication. https://en.wikipedia.org/wiki/David_Chaum
"chaum",
// Joan Clarke - Bletchley Park code breaker during the Second World War who pioneered techniques that remained top secret for decades. Also an accomplished numismatist https://en.wikipedia.org/wiki/Joan_Clarke
"clarke",
// Jane Colden - American botanist widely considered the first female American botanist - https://en.wikipedia.org/wiki/Jane_Colden
"colden",
// Gerty Theresa Cori - American biochemist who became the third woman—and first American woman—to win a Nobel Prize in science, and the first woman to be awarded the Nobel Prize in Physiology or Medicine. Cori was born in Prague. https://en.wikipedia.org/wiki/Gerty_Cori
"cori",
// Seymour Roger Cray was an American electrical engineer and supercomputer architect who designed a series of computers that were the fastest in the world for decades. https://en.wikipedia.org/wiki/Seymour_Cray
"cray",
// This entry reflects a husband and wife team who worked together:
// Joan Curran was a Welsh scientist who developed radar and invented chaff, a radar countermeasure. https://en.wikipedia.org/wiki/Joan_Curran
// Samuel Curran was an Irish physicist who worked alongside his wife during WWII and invented the proximity fuse. https://en.wikipedia.org/wiki/Samuel_Curran
"curran",
// Marie Curie discovered radioactivity. https://en.wikipedia.org/wiki/Marie_Curie.
"curie",
// Charles Darwin established the principles of natural evolution. https://en.wikipedia.org/wiki/Charles_Darwin.
"darwin",
// Leonardo Da Vinci invented too many things to list here. https://en.wikipedia.org/wiki/Leonardo_da_Vinci.
"davinci",
// A. K. (Alexander Keewatin) Dewdney, Canadian mathematician, computer scientist, author and filmmaker. Contributor to Scientific American's "Computer Recreations" from 1984 to 1991. Author of Core War (program), The Planiverse, The Armchair Universe, The Magic Machine, The New Turing Omnibus, and more. https://en.wikipedia.org/wiki/Alexander_Dewdney
"dewdney",
// Satish Dhawan - Indian mathematician and aerospace engineer, known for leading the successful and indigenous development of the Indian space programme. https://en.wikipedia.org/wiki/Satish_Dhawan
"dhawan",
// Bailey Whitfield Diffie - American cryptographer and one of the pioneers of public-key cryptography. https://en.wikipedia.org/wiki/Whitfield_Diffie
"diffie",
// Edsger Wybe Dijkstra was a Dutch computer scientist and mathematical scientist. https://en.wikipedia.org/wiki/Edsger_W._Dijkstra.
"dijkstra",
// Paul Adrien Maurice Dirac - English theoretical physicist who made fundamental contributions to the early development of both quantum mechanics and quantum electrodynamics. https://en.wikipedia.org/wiki/Paul_Dirac
"dirac",
// Agnes Meyer Driscoll - American cryptanalyst during World Wars I and II who successfully cryptanalysed a number of Japanese ciphers. She was also the co-developer of one of the cipher machines of the US Navy, the CM. https://en.wikipedia.org/wiki/Agnes_Meyer_Driscoll
"driscoll",
// Donna Dubinsky - played an integral role in the development of personal digital assistants (PDAs) serving as CEO of Palm, Inc. and co-founding Handspring. https://en.wikipedia.org/wiki/Donna_Dubinsky
"dubinsky",
// Annie Easley - She was a leading member of the team which developed software for the Centaur rocket stage and one of the first African-Americans in her field. https://en.wikipedia.org/wiki/Annie_Easley
"easley",
// Thomas Alva Edison, prolific inventor https://en.wikipedia.org/wiki/Thomas_Edison
"edison",
// Albert Einstein invented the general theory of relativity. https://en.wikipedia.org/wiki/Albert_Einstein
"einstein",
// Alexandra Asanovna Elbakyan (Russian: Алекса́ндра Аса́новна Элбакя́н) is a Kazakhstani graduate student, computer programmer, internet pirate in hiding, and the creator of the site Sci-Hub. Nature has listed her in 2016 in the top ten people that mattered in science, and Ars Technica has compared her to Aaron Swartz. - https://en.wikipedia.org/wiki/Alexandra_Elbakyan
"elbakyan",
// Taher A. ElGamal - Egyptian cryptographer best known for the ElGamal discrete log cryptosystem and the ElGamal digital signature scheme. https://en.wikipedia.org/wiki/Taher_Elgamal
"elgamal",
// Gertrude Elion - American biochemist, pharmacologist and the 1988 recipient of the Nobel Prize in Medicine - https://en.wikipedia.org/wiki/Gertrude_Elion
"elion",
// James Henry Ellis - British engineer and cryptographer employed by the GCHQ. Best known for conceiving for the first time, the idea of public-key cryptography. https://en.wikipedia.org/wiki/James_H._Ellis
"ellis",
// Douglas Engelbart gave the mother of all demos: https://en.wikipedia.org/wiki/Douglas_Engelbart
"engelbart",
// Euclid invented geometry. https://en.wikipedia.org/wiki/Euclid
"euclid",
// Leonhard Euler invented large parts of modern mathematics. https://de.wikipedia.org/wiki/Leonhard_Euler
"euler",
// Michael Faraday - British scientist who contributed to the study of electromagnetism and electrochemistry. https://en.wikipedia.org/wiki/Michael_Faraday
"faraday",
// Horst Feistel - German-born American cryptographer who was one of the earliest non-government researchers to study the design and theory of block ciphers. Co-developer of DES and Lucifer. Feistel networks, a symmetric structure used in the construction of block ciphers are named after him. https://en.wikipedia.org/wiki/Horst_Feistel
"feistel",
// Pierre de Fermat pioneered several aspects of modern mathematics. https://en.wikipedia.org/wiki/Pierre_de_Fermat
"fermat",
// Enrico Fermi invented the first nuclear reactor. https://en.wikipedia.org/wiki/Enrico_Fermi.
"fermi",
// Richard Feynman was a key contributor to quantum mechanics and particle physics. https://en.wikipedia.org/wiki/Richard_Feynman
"feynman",
// Benjamin Franklin is famous for his experiments in electricity and the invention of the lightning rod.
"franklin",
// Yuri Alekseyevich Gagarin - Soviet pilot and cosmonaut, best known as the first human to journey into outer space. https://en.wikipedia.org/wiki/Yuri_Gagarin
"gagarin",
// Galileo was a founding father of modern astronomy, and faced politics and obscurantism to establish scientific truth. https://en.wikipedia.org/wiki/Galileo_Galilei
"galileo",
// Évariste Galois - French mathematician whose work laid the foundations of Galois theory and group theory, two major branches of abstract algebra, and the subfield of Galois connections, all while still in his late teens. https://en.wikipedia.org/wiki/%C3%89variste_Galois
"galois",
// Kadambini Ganguly - Indian physician, known for being the first South Asian female physician, trained in western medicine, to graduate in South Asia. https://en.wikipedia.org/wiki/Kadambini_Ganguly
"ganguly",
// William Henry "Bill" Gates III is an American business magnate, philanthropist, investor, computer programmer, and inventor. https://en.wikipedia.org/wiki/Bill_Gates
"gates",
// Johann Carl Friedrich Gauss - German mathematician who made significant contributions to many fields, including number theory, algebra, statistics, analysis, differential geometry, geodesy, geophysics, mechanics, electrostatics, magnetic fields, astronomy, matrix theory, and optics. https://en.wikipedia.org/wiki/Carl_Friedrich_Gauss
"gauss",
// Marie-Sophie Germain - French mathematician, physicist and philosopher. Known for her work on elasticity theory, number theory and philosophy. https://en.wikipedia.org/wiki/Sophie_Germain
"germain",
// Adele Goldberg, was one of the designers and developers of the Smalltalk language. https://en.wikipedia.org/wiki/Adele_Goldberg_(computer_scientist)
"goldberg",
// Adele Goldstine, born Adele Katz, wrote the complete technical description for the first electronic digital computer, ENIAC. https://en.wikipedia.org/wiki/Adele_Goldstine
"goldstine",
// Shafi Goldwasser is a computer scientist known for creating theoretical foundations of modern cryptography. Winner of 2012 ACM Turing Award. https://en.wikipedia.org/wiki/Shafi_Goldwasser
"goldwasser",
// James Golick, all around gangster.
"golick",
// Jane Goodall - British primatologist, ethologist, and anthropologist who is considered to be the world's foremost expert on chimpanzees - https://en.wikipedia.org/wiki/Jane_Goodall
"goodall",
// Stephen Jay Gould was was an American paleontologist, evolutionary biologist, and historian of science. He is most famous for the theory of punctuated equilibrium - https://en.wikipedia.org/wiki/Stephen_Jay_Gould
"gould",
// Carolyn Widney Greider - American molecular biologist and joint winner of the 2009 Nobel Prize for Physiology or Medicine for the discovery of telomerase. https://en.wikipedia.org/wiki/Carol_W._Greider
"greider",
// Alexander Grothendieck - German-born French mathematician who became a leading figure in the creation of modern algebraic geometry. https://en.wikipedia.org/wiki/Alexander_Grothendieck
"grothendieck",
// Lois Haibt - American computer scientist, part of the team at IBM that developed FORTRAN - https://en.wikipedia.org/wiki/Lois_Haibt
"haibt",
// Margaret Hamilton - Director of the Software Engineering Division of the MIT Instrumentation Laboratory, which developed on-board flight software for the Apollo space program. https://en.wikipedia.org/wiki/Margaret_Hamilton_(scientist)
"hamilton",
// Caroline Harriet Haslett - English electrical engineer, electricity industry administrator and champion of women's rights. Co-author of British Standard 1363 that specifies AC power plugs and sockets used across the United Kingdom (which is widely considered as one of the safest designs). https://en.wikipedia.org/wiki/Caroline_Haslett
"haslett",
// Stephen Hawking pioneered the field of cosmology by combining general relativity and quantum mechanics. https://en.wikipedia.org/wiki/Stephen_Hawking
"hawking",
// Martin Edward Hellman - American cryptologist, best known for his invention of public-key cryptography in co-operation with Whitfield Diffie and Ralph Merkle. https://en.wikipedia.org/wiki/Martin_Hellman
"hellman",
// Werner Heisenberg was a founding father of quantum mechanics. https://en.wikipedia.org/wiki/Werner_Heisenberg
"heisenberg",
// Grete Hermann was a German philosopher noted for her philosophical work on the foundations of quantum mechanics. https://en.wikipedia.org/wiki/Grete_Hermann
"hermann",
// Caroline Lucretia Herschel - German astronomer and discoverer of several comets. https://en.wikipedia.org/wiki/Caroline_Herschel
"herschel",
// Heinrich Rudolf Hertz - German physicist who first conclusively proved the existence of the electromagnetic waves. https://en.wikipedia.org/wiki/Heinrich_Hertz
"hertz",
// Jaroslav Heyrovský was the inventor of the polarographic method, father of the electroanalytical method, and recipient of the Nobel Prize in 1959. His main field of work was polarography. https://en.wikipedia.org/wiki/Jaroslav_Heyrovsk%C3%BD
"heyrovsky",
// Dorothy Hodgkin was a British biochemist, credited with the development of protein crystallography. She was awarded the Nobel Prize in Chemistry in 1964. https://en.wikipedia.org/wiki/Dorothy_Hodgkin
"hodgkin",
// Douglas R. Hofstadter is an American professor of cognitive science and author of the Pulitzer Prize and American Book Award-winning work Goedel, Escher, Bach: An Eternal Golden Braid in 1979. A mind-bending work which coined Hofstadter's Law: "It always takes longer than you expect, even when you take into account Hofstadter's Law." https://en.wikipedia.org/wiki/Douglas_Hofstadter
"hofstadter",
// Erna Schneider Hoover revolutionized modern communication by inventing a computerized telephone switching method. https://en.wikipedia.org/wiki/Erna_Schneider_Hoover
"hoover",
// Grace Hopper developed the first compiler for a computer programming language and is credited with popularizing the term "debugging" for fixing computer glitches. https://en.wikipedia.org/wiki/Grace_Hopper
"hopper",
// Frances Hugle, she was an American scientist, engineer, and inventor who contributed to the understanding of semiconductors, integrated circuitry, and the unique electrical principles of microscopic materials. https://en.wikipedia.org/wiki/Frances_Hugle
"hugle",
// Hypatia - Greek Alexandrine Neoplatonist philosopher in Egypt who was one of the earliest mothers of mathematics - https://en.wikipedia.org/wiki/Hypatia
"hypatia",
// Teruko Ishizaka - Japanese scientist and immunologist who co-discovered the antibody class Immunoglobulin E. https://en.wikipedia.org/wiki/Teruko_Ishizaka
"ishizaka",
// Mary Jackson, American mathematician and aerospace engineer who earned the highest title within NASA's engineering department - https://en.wikipedia.org/wiki/Mary_Jackson_(engineer)
"jackson",
// Yeong-Sil Jang was a Korean scientist and astronomer during the Joseon Dynasty; he invented the first metal printing press and water gauge. https://en.wikipedia.org/wiki/Jang_Yeong-sil
"jang",
// Mae Carol Jemison - is an American engineer, physician, and former NASA astronaut. She became the first black woman to travel in space when she served as a mission specialist aboard the Space Shuttle Endeavour - https://en.wikipedia.org/wiki/Mae_Jemison
"jemison",
// Betty Jennings - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Jean_Bartik
"jennings",
// Mary Lou Jepsen, was the founder and chief technology officer of One Laptop Per Child (OLPC), and the founder of Pixel Qi. https://en.wikipedia.org/wiki/Mary_Lou_Jepsen
"jepsen",
// Katherine Coleman Goble Johnson - American physicist and mathematician contributed to the NASA. https://en.wikipedia.org/wiki/Katherine_Johnson
"johnson",
// Irène Joliot-Curie - French scientist who was awarded the Nobel Prize for Chemistry in 1935. Daughter of Marie and Pierre Curie. https://en.wikipedia.org/wiki/Ir%C3%A8ne_Joliot-Curie
"joliot",
// Karen Spärck Jones came up with the concept of inverse document frequency, which is used in most search engines today. https://en.wikipedia.org/wiki/Karen_Sp%C3%A4rck_Jones
"jones",
// A. P. J. Abdul Kalam - is an Indian scientist aka Missile Man of India for his work on the development of ballistic missile and launch vehicle technology - https://en.wikipedia.org/wiki/A._P._J._Abdul_Kalam
"kalam",
// Sergey Petrovich Kapitsa (Russian: Серге́й Петро́вич Капи́ца; 14 February 1928 14 August 2012) was a Russian physicist and demographer. He was best known as host of the popular and long-running Russian scientific TV show, Evident, but Incredible. His father was the Nobel laureate Soviet-era physicist Pyotr Kapitsa, and his brother was the geographer and Antarctic explorer Andrey Kapitsa. - https://en.wikipedia.org/wiki/Sergey_Kapitsa
"kapitsa",
// Susan Kare, created the icons and many of the interface elements for the original Apple Macintosh in the 1980s, and was an original employee of NeXT, working as the Creative Director. https://en.wikipedia.org/wiki/Susan_Kare
"kare",
// Mstislav Keldysh - a Soviet scientist in the field of mathematics and mechanics, academician of the USSR Academy of Sciences (1946), President of the USSR Academy of Sciences (19611975), three times Hero of Socialist Labor (1956, 1961, 1971), fellow of the Royal Society of Edinburgh (1968). https://en.wikipedia.org/wiki/Mstislav_Keldysh
"keldysh",
// Mary Kenneth Keller, Sister Mary Kenneth Keller became the first American woman to earn a PhD in Computer Science in 1965. https://en.wikipedia.org/wiki/Mary_Kenneth_Keller
"keller",
// Johannes Kepler, German astronomer known for his three laws of planetary motion - https://en.wikipedia.org/wiki/Johannes_Kepler
"kepler",
// Omar Khayyam - Persian mathematician, astronomer and poet. Known for his work on the classification and solution of cubic equations, for his contribution to the understanding of Euclid's fifth postulate and for computing the length of a year very accurately. https://en.wikipedia.org/wiki/Omar_Khayyam
"khayyam",
// Har Gobind Khorana - Indian-American biochemist who shared the 1968 Nobel Prize for Physiology - https://en.wikipedia.org/wiki/Har_Gobind_Khorana
"khorana",
// Jack Kilby invented silicon integrated circuits and gave Silicon Valley its name. - https://en.wikipedia.org/wiki/Jack_Kilby
"kilby",
// Maria Kirch - German astronomer and first woman to discover a comet - https://en.wikipedia.org/wiki/Maria_Margarethe_Kirch
"kirch",
// Donald Knuth - American computer scientist, author of "The Art of Computer Programming" and creator of the TeX typesetting system. https://en.wikipedia.org/wiki/Donald_Knuth
"knuth",
// Sophie Kowalevski - Russian mathematician responsible for important original contributions to analysis, differential equations and mechanics - https://en.wikipedia.org/wiki/Sofia_Kovalevskaya
"kowalevski",
// Marie-Jeanne de Lalande - French astronomer, mathematician and cataloguer of stars - https://en.wikipedia.org/wiki/Marie-Jeanne_de_Lalande
"lalande",
// Hedy Lamarr - Actress and inventor. The principles of her work are now incorporated into modern Wi-Fi, CDMA and Bluetooth technology. https://en.wikipedia.org/wiki/Hedy_Lamarr
"lamarr",
// Leslie B. Lamport - American computer scientist. Lamport is best known for his seminal work in distributed systems and was the winner of the 2013 Turing Award. https://en.wikipedia.org/wiki/Leslie_Lamport
"lamport",
// Mary Leakey - British paleoanthropologist who discovered the first fossilized Proconsul skull - https://en.wikipedia.org/wiki/Mary_Leakey
"leakey",
// Henrietta Swan Leavitt - she was an American astronomer who discovered the relation between the luminosity and the period of Cepheid variable stars. https://en.wikipedia.org/wiki/Henrietta_Swan_Leavitt
"leavitt",
// Esther Miriam Zimmer Lederberg - American microbiologist and a pioneer of bacterial genetics. https://en.wikipedia.org/wiki/Esther_Lederberg
"lederberg",
// Inge Lehmann - Danish seismologist and geophysicist. Known for discovering in 1936 that the Earth has a solid inner core inside a molten outer core. https://en.wikipedia.org/wiki/Inge_Lehmann
"lehmann",
// Daniel Lewin - Mathematician, Akamai co-founder, soldier, 9/11 victim-- Developed optimization techniques for routing traffic on the internet. Died attempting to stop the 9-11 hijackers. https://en.wikipedia.org/wiki/Daniel_Lewin
"lewin",
// Ruth Lichterman - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Ruth_Teitelbaum
"lichterman",
// Barbara Liskov - co-developed the Liskov substitution principle. Liskov was also the winner of the Turing Prize in 2008. - https://en.wikipedia.org/wiki/Barbara_Liskov
"liskov",
// Ada Lovelace invented the first algorithm. https://en.wikipedia.org/wiki/Ada_Lovelace (thanks James Turnbull)
"lovelace",
// Auguste and Louis Lumière - the first filmmakers in history - https://en.wikipedia.org/wiki/Auguste_and_Louis_Lumi%C3%A8re
"lumiere",
// Mahavira - Ancient Indian mathematician during 9th century AD who discovered basic algebraic identities - https://en.wikipedia.org/wiki/Mah%C4%81v%C4%ABra_(mathematician)
"mahavira",
// Lynn Margulis (b. Lynn Petra Alexander) - an American evolutionary theorist and biologist, science author, educator, and popularizer, and was the primary modern proponent for the significance of symbiosis in evolution. - https://en.wikipedia.org/wiki/Lynn_Margulis
"margulis",
// Yukihiro Matsumoto - Japanese computer scientist and software programmer best known as the chief designer of the Ruby programming language. https://en.wikipedia.org/wiki/Yukihiro_Matsumoto
"matsumoto",
// James Clerk Maxwell - Scottish physicist, best known for his formulation of electromagnetic theory. https://en.wikipedia.org/wiki/James_Clerk_Maxwell
"maxwell",
// Maria Mayer - American theoretical physicist and Nobel laureate in Physics for proposing the nuclear shell model of the atomic nucleus - https://en.wikipedia.org/wiki/Maria_Mayer
"mayer",
// John McCarthy invented LISP: https://en.wikipedia.org/wiki/John_McCarthy_(computer_scientist)
"mccarthy",
// Barbara McClintock - a distinguished American cytogeneticist, 1983 Nobel Laureate in Physiology or Medicine for discovering transposons. https://en.wikipedia.org/wiki/Barbara_McClintock
"mcclintock",
// Anne Laura Dorinthea McLaren - British developmental biologist whose work helped lead to human in-vitro fertilisation. https://en.wikipedia.org/wiki/Anne_McLaren
"mclaren",
// Malcolm McLean invented the modern shipping container: https://en.wikipedia.org/wiki/Malcom_McLean
"mclean",
// Kay McNulty - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Kathleen_Antonelli
"mcnulty",
// Gregor Johann Mendel - Czech scientist and founder of genetics. https://en.wikipedia.org/wiki/Gregor_Mendel
"mendel",
// Dmitri Mendeleev - a chemist and inventor. He formulated the Periodic Law, created a farsighted version of the periodic table of elements, and used it to correct the properties of some already discovered elements and also to predict the properties of eight elements yet to be discovered. https://en.wikipedia.org/wiki/Dmitri_Mendeleev
"mendeleev",
// Lise Meitner - Austrian/Swedish physicist who was involved in the discovery of nuclear fission. The element meitnerium is named after her - https://en.wikipedia.org/wiki/Lise_Meitner
"meitner",
// Carla Meninsky, was the game designer and programmer for Atari 2600 games Dodge 'Em and Warlords. https://en.wikipedia.org/wiki/Carla_Meninsky
"meninsky",
// Ralph C. Merkle - American computer scientist, known for devising Merkle's puzzles - one of the very first schemes for public-key cryptography. Also, inventor of Merkle trees and co-inventor of the Merkle-Damgård construction for building collision-resistant cryptographic hash functions and the Merkle-Hellman knapsack cryptosystem. https://en.wikipedia.org/wiki/Ralph_Merkle
"merkle",
// Johanna Mestorf - German prehistoric archaeologist and first female museum director in Germany - https://en.wikipedia.org/wiki/Johanna_Mestorf
"mestorf",
// Maryam Mirzakhani - an Iranian mathematician and the first woman to win the Fields Medal. https://en.wikipedia.org/wiki/Maryam_Mirzakhani
"mirzakhani",
// Gordon Earle Moore - American engineer, Silicon Valley founding father, author of Moore's law. https://en.wikipedia.org/wiki/Gordon_Moore
"moore",
// Samuel Morse - contributed to the invention of a single-wire telegraph system based on European telegraphs and was a co-developer of the Morse code - https://en.wikipedia.org/wiki/Samuel_Morse
"morse",
// Ian Murdock - founder of the Debian project - https://en.wikipedia.org/wiki/Ian_Murdock
"murdock",
// May-Britt Moser - Nobel prize winner neuroscientist who contributed to the discovery of grid cells in the brain. https://en.wikipedia.org/wiki/May-Britt_Moser
"moser",
// John Napier of Merchiston - Scottish landowner known as an astronomer, mathematician and physicist. Best known for his discovery of logarithms. https://en.wikipedia.org/wiki/John_Napier
"napier",
// John Forbes Nash, Jr. - American mathematician who made fundamental contributions to game theory, differential geometry, and the study of partial differential equations. https://en.wikipedia.org/wiki/John_Forbes_Nash_Jr.
"nash",
// John von Neumann - todays computer architectures are based on the von Neumann architecture. https://en.wikipedia.org/wiki/Von_Neumann_architecture
"neumann",
// Isaac Newton invented classic mechanics and modern optics. https://en.wikipedia.org/wiki/Isaac_Newton
"newton",
// Florence Nightingale, more prominently known as a nurse, was also the first female member of the Royal Statistical Society and a pioneer in statistical graphics https://en.wikipedia.org/wiki/Florence_Nightingale#Statistics_and_sanitary_reform
"nightingale",
// Alfred Nobel - a Swedish chemist, engineer, innovator, and armaments manufacturer (inventor of dynamite) - https://en.wikipedia.org/wiki/Alfred_Nobel
"nobel",
// Emmy Noether, German mathematician. Noether's Theorem is named after her. https://en.wikipedia.org/wiki/Emmy_Noether
"noether",
// Poppy Northcutt. Poppy Northcutt was the first woman to work as part of NASAs Mission Control. http://www.businessinsider.com/poppy-northcutt-helped-apollo-astronauts-2014-12?op=1
"northcutt",
// Robert Noyce invented silicon integrated circuits and gave Silicon Valley its name. - https://en.wikipedia.org/wiki/Robert_Noyce
"noyce",
// Panini - Ancient Indian linguist and grammarian from 4th century CE who worked on the world's first formal system - https://en.wikipedia.org/wiki/P%C4%81%E1%B9%87ini#Comparison_with_modern_formal_systems
"panini",
// Ambroise Pare invented modern surgery. https://en.wikipedia.org/wiki/Ambroise_Par%C3%A9
"pare",
// Blaise Pascal, French mathematician, physicist, and inventor - https://en.wikipedia.org/wiki/Blaise_Pascal
"pascal",
// Louis Pasteur discovered vaccination, fermentation and pasteurization. https://en.wikipedia.org/wiki/Louis_Pasteur.
"pasteur",
// Cecilia Payne-Gaposchkin was an astronomer and astrophysicist who, in 1925, proposed in her Ph.D. thesis an explanation for the composition of stars in terms of the relative abundances of hydrogen and helium. https://en.wikipedia.org/wiki/Cecilia_Payne-Gaposchkin
"payne",
// Radia Perlman is a software designer and network engineer and most famous for her invention of the spanning-tree protocol (STP). https://en.wikipedia.org/wiki/Radia_Perlman
"perlman",
// Rob Pike was a key contributor to Unix, Plan 9, the X graphic system, utf-8, and the Go programming language. https://en.wikipedia.org/wiki/Rob_Pike
"pike",
// Henri Poincaré made fundamental contributions in several fields of mathematics. https://en.wikipedia.org/wiki/Henri_Poincar%C3%A9
"poincare",
// Laura Poitras is a director and producer whose work, made possible by open source crypto tools, advances the causes of truth and freedom of information by reporting disclosures by whistleblowers such as Edward Snowden. https://en.wikipedia.org/wiki/Laura_Poitras
"poitras",
// Tatyana Avenirovna Proskuriakova (Russian: Татья́на Авени́ровна Проскуряко́ва) (January 23 [O.S. January 10] 1909 August 30, 1985) was a Russian-American Mayanist scholar and archaeologist who contributed significantly to the deciphering of Maya hieroglyphs, the writing system of the pre-Columbian Maya civilization of Mesoamerica. https://en.wikipedia.org/wiki/Tatiana_Proskouriakoff
"proskuriakova",
// Claudius Ptolemy - a Greco-Egyptian writer of Alexandria, known as a mathematician, astronomer, geographer, astrologer, and poet of a single epigram in the Greek Anthology - https://en.wikipedia.org/wiki/Ptolemy
"ptolemy",
// C. V. Raman - Indian physicist who won the Nobel Prize in 1930 for proposing the Raman effect. - https://en.wikipedia.org/wiki/C._V._Raman
"raman",
// Srinivasa Ramanujan - Indian mathematician and autodidact who made extraordinary contributions to mathematical analysis, number theory, infinite series, and continued fractions. - https://en.wikipedia.org/wiki/Srinivasa_Ramanujan
"ramanujan",
// Sally Kristen Ride was an American physicist and astronaut. She was the first American woman in space, and the youngest American astronaut. https://en.wikipedia.org/wiki/Sally_Ride
"ride",
// Rita Levi-Montalcini - Won Nobel Prize in Physiology or Medicine jointly with colleague Stanley Cohen for the discovery of nerve growth factor (https://en.wikipedia.org/wiki/Rita_Levi-Montalcini)
"montalcini",
// Dennis Ritchie - co-creator of UNIX and the C programming language. - https://en.wikipedia.org/wiki/Dennis_Ritchie
"ritchie",
// Ida Rhodes - American pioneer in computer programming, designed the first computer used for Social Security. https://en.wikipedia.org/wiki/Ida_Rhodes
"rhodes",
// Julia Hall Bowman Robinson - American mathematician renowned for her contributions to the fields of computability theory and computational complexity theory. https://en.wikipedia.org/wiki/Julia_Robinson
"robinson",
// Wilhelm Conrad Röntgen - German physicist who was awarded the first Nobel Prize in Physics in 1901 for the discovery of X-rays (Röntgen rays). https://en.wikipedia.org/wiki/Wilhelm_R%C3%B6ntgen
"roentgen",
// Rosalind Franklin - British biophysicist and X-ray crystallographer whose research was critical to the understanding of DNA - https://en.wikipedia.org/wiki/Rosalind_Franklin
"rosalind",
// Vera Rubin - American astronomer who pioneered work on galaxy rotation rates. https://en.wikipedia.org/wiki/Vera_Rubin
"rubin",
// Meghnad Saha - Indian astrophysicist best known for his development of the Saha equation, used to describe chemical and physical conditions in stars - https://en.wikipedia.org/wiki/Meghnad_Saha
"saha",
// Jean E. Sammet developed FORMAC, the first widely used computer language for symbolic manipulation of mathematical formulas. https://en.wikipedia.org/wiki/Jean_E._Sammet
"sammet",
// Mildred Sanderson - American mathematician best known for Sanderson's theorem concerning modular invariants. https://en.wikipedia.org/wiki/Mildred_Sanderson
"sanderson",
// Satoshi Nakamoto is the name used by the unknown person or group of people who developed bitcoin, authored the bitcoin white paper, and created and deployed bitcoin's original reference implementation. https://en.wikipedia.org/wiki/Satoshi_Nakamoto
"satoshi",
// Adi Shamir - Israeli cryptographer whose numerous inventions and contributions to cryptography include the Ferge Fiat Shamir identification scheme, the Rivest Shamir Adleman (RSA) public-key cryptosystem, the Shamir's secret sharing scheme, the breaking of the Merkle-Hellman cryptosystem, the TWINKLE and TWIRL factoring devices and the discovery of differential cryptanalysis (with Eli Biham). https://en.wikipedia.org/wiki/Adi_Shamir
"shamir",
// Claude Shannon - The father of information theory and founder of digital circuit design theory. (https://en.wikipedia.org/wiki/Claude_Shannon)
"shannon",
// Carol Shaw - Originally an Atari employee, Carol Shaw is said to be the first female video game designer. https://en.wikipedia.org/wiki/Carol_Shaw_(video_game_designer)
"shaw",
// Dame Stephanie "Steve" Shirley - Founded a software company in 1962 employing women working from home. https://en.wikipedia.org/wiki/Steve_Shirley
"shirley",
// William Shockley co-invented the transistor - https://en.wikipedia.org/wiki/William_Shockley
"shockley",
// Lina Solomonovna Stern (or Shtern; Russian: Лина Соломоновна Штерн; 26 August 1878 7 March 1968) was a Soviet biochemist, physiologist and humanist whose medical discoveries saved thousands of lives at the fronts of World War II. She is best known for her pioneering work on bloodbrain barrier, which she described as hemato-encephalic barrier in 1921. https://en.wikipedia.org/wiki/Lina_Stern
"shtern",
// Françoise Barré-Sinoussi - French virologist and Nobel Prize Laureate in Physiology or Medicine; her work was fundamental in identifying HIV as the cause of AIDS. https://en.wikipedia.org/wiki/Fran%C3%A7oise_Barr%C3%A9-Sinoussi
"sinoussi",
// Betty Snyder - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Betty_Holberton
"snyder",
// Cynthia Solomon - Pioneer in the fields of artificial intelligence, computer science and educational computing. Known for creation of Logo, an educational programming language. https://en.wikipedia.org/wiki/Cynthia_Solomon
"solomon",
// Frances Spence - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Frances_Spence
"spence",
// Michael Stonebraker is a database research pioneer and architect of Ingres, Postgres, VoltDB and SciDB. Winner of 2014 ACM Turing Award. https://en.wikipedia.org/wiki/Michael_Stonebraker
"stonebraker",
// Ivan Edward Sutherland - American computer scientist and Internet pioneer, widely regarded as the father of computer graphics. https://en.wikipedia.org/wiki/Ivan_Sutherland
"sutherland",
// Janese Swanson (with others) developed the first of the Carmen Sandiego games. She went on to found Girl Tech. https://en.wikipedia.org/wiki/Janese_Swanson
"swanson",
// Aaron Swartz was influential in creating RSS, Markdown, Creative Commons, Reddit, and much of the internet as we know it today. He was devoted to freedom of information on the web. https://en.wikiquote.org/wiki/Aaron_Swartz
"swartz",
// Bertha Swirles was a theoretical physicist who made a number of contributions to early quantum theory. https://en.wikipedia.org/wiki/Bertha_Swirles
"swirles",
// Helen Brooke Taussig - American cardiologist and founder of the field of paediatric cardiology. https://en.wikipedia.org/wiki/Helen_B._Taussig
"taussig",
// Valentina Tereshkova is a Russian engineer, cosmonaut and politician. She was the first woman to fly to space in 1963. In 2013, at the age of 76, she offered to go on a one-way mission to Mars. https://en.wikipedia.org/wiki/Valentina_Tereshkova
"tereshkova",
// Nikola Tesla invented the AC electric system and every gadget ever used by a James Bond villain. https://en.wikipedia.org/wiki/Nikola_Tesla
"tesla",
// Marie Tharp - American geologist and oceanic cartographer who co-created the first scientific map of the Atlantic Ocean floor. Her work led to the acceptance of the theories of plate tectonics and continental drift. https://en.wikipedia.org/wiki/Marie_Tharp
"tharp",
// Ken Thompson - co-creator of UNIX and the C programming language - https://en.wikipedia.org/wiki/Ken_Thompson
"thompson",
// Linus Torvalds invented Linux and Git. https://en.wikipedia.org/wiki/Linus_Torvalds
"torvalds",
// Youyou Tu - Chinese pharmaceutical chemist and educator known for discovering artemisinin and dihydroartemisinin, used to treat malaria, which has saved millions of lives. Joint winner of the 2015 Nobel Prize in Physiology or Medicine. https://en.wikipedia.org/wiki/Tu_Youyou
"tu",
// Alan Turing was a founding father of computer science. https://en.wikipedia.org/wiki/Alan_Turing.
"turing",
// Varahamihira - Ancient Indian mathematician who discovered trigonometric formulae during 505-587 CE - https://en.wikipedia.org/wiki/Var%C4%81hamihira#Contributions
"varahamihira",
// Dorothy Vaughan was a NASA mathematician and computer programmer on the SCOUT launch vehicle program that put America's first satellites into space - https://en.wikipedia.org/wiki/Dorothy_Vaughan
"vaughan",
// Sir Mokshagundam Visvesvaraya - is a notable Indian engineer. He is a recipient of the Indian Republic's highest honour, the Bharat Ratna, in 1955. On his birthday, 15 September is celebrated as Engineer's Day in India in his memory - https://en.wikipedia.org/wiki/Visvesvaraya
"visvesvaraya",
// Christiane Nüsslein-Volhard - German biologist, won Nobel Prize in Physiology or Medicine in 1995 for research on the genetic control of embryonic development. https://en.wikipedia.org/wiki/Christiane_N%C3%BCsslein-Volhard
"volhard",
// Cédric Villani - French mathematician, won Fields Medal, Fermat Prize and Poincaré Price for his work in differential geometry and statistical mechanics. https://en.wikipedia.org/wiki/C%C3%A9dric_Villani
"villani",
// Marlyn Wescoff - one of the original programmers of the ENIAC. https://en.wikipedia.org/wiki/ENIAC - https://en.wikipedia.org/wiki/Marlyn_Meltzer
"wescoff",
// Sylvia B. Wilbur - British computer scientist who helped develop the ARPANET, was one of the first to exchange email in the UK and a leading researcher in computer-supported collaborative work. https://en.wikipedia.org/wiki/Sylvia_Wilbur
"wilbur",
// Andrew Wiles - Notable British mathematician who proved the enigmatic Fermat's Last Theorem - https://en.wikipedia.org/wiki/Andrew_Wiles
"wiles",
// Roberta Williams, did pioneering work in graphical adventure games for personal computers, particularly the King's Quest series. https://en.wikipedia.org/wiki/Roberta_Williams
"williams",
// Malcolm John Williamson - British mathematician and cryptographer employed by the GCHQ. Developed in 1974 what is now known as Diffie-Hellman key exchange (Diffie and Hellman first published the scheme in 1976). https://en.wikipedia.org/wiki/Malcolm_J._Williamson
"williamson",
// Sophie Wilson designed the first Acorn Micro-Computer and the instruction set for ARM processors. https://en.wikipedia.org/wiki/Sophie_Wilson
"wilson",
// Jeannette Wing - co-developed the Liskov substitution principle. - https://en.wikipedia.org/wiki/Jeannette_Wing
"wing",
// Steve Wozniak invented the Apple I and Apple II. https://en.wikipedia.org/wiki/Steve_Wozniak
"wozniak",
// The Wright brothers, Orville and Wilbur - credited with inventing and building the world's first successful airplane and making the first controlled, powered and sustained heavier-than-air human flight - https://en.wikipedia.org/wiki/Wright_brothers
"wright",
// Chien-Shiung Wu - Chinese-American experimental physicist who made significant contributions to nuclear physics. https://en.wikipedia.org/wiki/Chien-Shiung_Wu
"wu",
// Rosalyn Sussman Yalow - Rosalyn Sussman Yalow was an American medical physicist, and a co-winner of the 1977 Nobel Prize in Physiology or Medicine for development of the radioimmunoassay technique. https://en.wikipedia.org/wiki/Rosalyn_Sussman_Yalow
"yalow",
// Ada Yonath - an Israeli crystallographer, the first woman from the Middle East to win a Nobel prize in the sciences. https://en.wikipedia.org/wiki/Ada_Yonath
"yonath",
// Nikolay Yegorovich Zhukovsky (Russian: Никола́й Его́рович Жуко́вский, January 17 1847 March 17, 1921) was a Russian scientist, mathematician and engineer, and a founding father of modern aero- and hydrodynamics. Whereas contemporary scientists scoffed at the idea of human flight, Zhukovsky was the first to undertake the study of airflow. He is often called the Father of Russian Aviation. https://en.wikipedia.org/wiki/Nikolay_Yegorovich_Zhukovsky
"zhukovsky",
}
)
// GetRandomName generates a random name from the list of adjectives and surnames in this package
// formatted as "adjective_surname". For example 'focused_turing'. If retry is non-zero, a random
// integer between 0 and 10 will be added to the end of the name, e.g `focused_turing3`
func getRandomName(retry int) string {
begin:
name := fmt.Sprintf("%s-%s", left[rand.Intn(len(left))], right[rand.Intn(len(right))])
if name == "boring-wozniak" /* Steve Wozniak is not boring */ {
goto begin
}
if retry > 0 {
name = fmt.Sprintf("%s-%d", name, rand.Intn(1000))
}
return name
}

View File

@ -0,0 +1,141 @@
package main
import (
"database/sql"
"sync"
"github.com/inconshreveable/log15"
_ "github.com/mattn/go-sqlite3"
)
// feederDB is a front to a sqlite DB that records ownerRepo processed, orgs created and whether
// processing was successful or failed
type feederDB struct {
// sqlite is not thread-safe, this mutex protects access to it
sync.Mutex
// where the DB file is
path string
// the opened DB
db *sql.DB
// logger for this feeder DB
logger log15.Logger
}
// newFeederDB creates or opens the DB, creating the two tables if necessary
func newFeederDB(path string) (*feederDB, error) {
db, err := sql.Open("sqlite3", path)
if err != nil {
return nil, err
}
stmt, err := db.Prepare("CREATE TABLE IF NOT EXISTS repos (ownerRepo STRING PRIMARY KEY, org STRING, failed BOOLEAN, errType STRING, UNIQUE(ownerRepo, failed))")
if err != nil {
return nil, err
}
_, err = stmt.Exec()
if err != nil {
return nil, err
}
stmt, err = db.Prepare("CREATE TABLE IF NOT EXISTS orgs (name STRING PRIMARY KEY)")
if err != nil {
return nil, err
}
_, err = stmt.Exec()
if err != nil {
return nil, err
}
return &feederDB{
path: path,
db: db,
logger: log15.New("source", "feederDB"),
}, nil
}
// declareRepo adds the ownerRepo to the DB when it gets pumped into the pipe and made available to the workers
// for processing. if ownerRepo was already done in a previous run, then returns true, so pump can skip it.
func (fdr *feederDB) declareRepo(ownerRepo string) (alreadyDone bool, err error) {
fdr.Lock()
defer fdr.Unlock()
var failed bool
var errType string
err = fdr.db.QueryRow("SELECT failed, errType FROM repos WHERE ownerRepo=?", ownerRepo).Scan(&failed, &errType)
if err != nil && err != sql.ErrNoRows {
return false, err
}
if err == sql.ErrNoRows {
stmt, err := fdr.db.Prepare("INSERT INTO repos(ownerRepo, failed) VALUES(?, FALSE)")
if err != nil {
return false, err
}
_, err = stmt.Exec(ownerRepo)
if err != nil {
return false, err
}
return false, nil
}
alreadyDone = !failed || (failed && errType == "clone")
return alreadyDone, nil
}
// failed records the fact that the worker processing the specified ownerRepo failed to process it.
// errType is recorded because specific errTypes are not worth rerunning in a subsequent run (for example if repo is private
// on github.com and we don't have credentials for it, it's not worth trying again in a next run).
func (fdr *feederDB) failed(ownerRepo string, errType string) error {
fdr.Lock()
defer fdr.Unlock()
stmt, err := fdr.db.Prepare("UPDATE repos SET failed = TRUE, errType = ? WHERE ownerRepo = ?")
if err != nil {
return err
}
_, err = stmt.Exec(errType, ownerRepo)
if err != nil {
return err
}
return nil
}
// succeeded records that a worker has successfully processed the specified ownerRepo.
func (fdr *feederDB) succeeded(ownerRepo string, org string) error {
fdr.Lock()
defer fdr.Unlock()
stmt, err := fdr.db.Prepare("UPDATE repos SET failed = FALSE, org = ? WHERE ownerRepo = ?")
if err != nil {
return err
}
_, err = stmt.Exec(org, ownerRepo)
if err != nil {
return err
}
return nil
}
// declareOrg adds a newly created org from one of the workers.
func (fdr *feederDB) declareOrg(org string) error {
fdr.Lock()
defer fdr.Unlock()
stmt, err := fdr.db.Prepare("INSERT OR IGNORE INTO orgs(name) VALUES(?)")
if err != nil {
return err
}
_, err = stmt.Exec(org)
if err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,201 @@
package main
import (
"bufio"
"context"
"flag"
"os"
"path/filepath"
"strings"
"github.com/inconshreveable/log15"
"github.com/prometheus/client_golang/prometheus"
"github.com/schollz/progressbar/v3"
)
// extractOwnerRepoFromCSVLine extracts the owner and repo from a line that comes from a CSV file that a GHE instance
// created in a repo report (so it has a certain number of fields).
// for example: 2019-05-23 15:24:16 -0700,4,Organization,sourcegraph,9,tsenart-vegeta,public,1.64 MB,1683,0,false,false
// we're looking for field number 6 (tsenart-vegeta in the example) and split it into owner/repo by replacing the first
// '-' with a '/' (the owner and repo were merged when added, this is the owner on github.com, not in the GHE)
func extractOwnerRepoFromCSVLine(line string) string {
if len(line) == 0 {
return line
}
elems := strings.Split(line, ",")
if len(elems) != 12 {
return ""
}
var ownerRepo = elems[5]
return strings.Replace(ownerRepo, "-", "/", 1)
}
// producer is pumping input line by line into the pipe channel for processing by the workers.
type producer struct {
// how many lines are remaining to be processed
remaining int64
// where to send each ownerRepo. the workers expect 'owner/repo' strings
pipe chan<- string
// sqlite DB where each ownerRepo is declared (to keep progress and to implement resume functionality)
fdr *feederDB
// how many we have already processed
numAlreadyDone int64
// logger for the pump
logger log15.Logger
// terminal UI progress bar
bar *progressbar.ProgressBar
// skips this many lines from the input before starting to feed into the pipe
skipNumLines int64
}
// pumpFile reads the specified file line by line and feeds ownerRepo strings into the pipe
func (prdc *producer) pumpFile(ctx context.Context, path string) error {
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
isCSV := strings.HasSuffix(path, ".csv")
scanner := bufio.NewScanner(file)
lineNum := int64(0)
for scanner.Scan() && prdc.remaining > 0 {
if prdc.skipNumLines > 0 {
prdc.skipNumLines--
continue
}
line := strings.TrimSpace(scanner.Text())
if isCSV {
line = extractOwnerRepoFromCSVLine(line)
} else {
line = strings.Trim(line, "\"")
}
if len(line) == 0 {
continue
}
alreadyDone, err := prdc.fdr.declareRepo(line)
if err != nil {
return err
}
if alreadyDone {
prdc.numAlreadyDone++
_ = prdc.bar.Add(1)
reposAlreadyDoneCounter.Inc()
reposProcessedCounter.With(prometheus.Labels{"worker": "skipped"}).Inc()
reposSucceededCounter.Inc()
remainingWorkGauge.Add(-1.0)
prdc.logger.Debug("repo already done in previous run", "owner/repo", line)
continue
}
select {
case prdc.pipe <- line:
prdc.remaining--
case <-ctx.Done():
return scanner.Err()
}
lineNum++
}
return scanner.Err()
}
// pump finds all the input files specified as command line by recursively going through all specified directories
// and looking for '*.csv', '*.json' and '*.txt' files.
func (prdc *producer) pump(ctx context.Context) error {
for _, root := range flag.Args() {
if ctx.Err() != nil || prdc.remaining <= 0 {
return nil
}
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
if prdc.remaining <= 0 {
return nil
}
if strings.HasSuffix(path, ".csv") || strings.HasSuffix(path, ".txt") ||
strings.HasSuffix(path, ".json") {
err := prdc.pumpFile(ctx, path)
if err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
}
return nil
}
// numLinesInFile counts how many lines are in the specified file (it starts counting only after skipNumLines have been
// skipped from the file). Returns counted lines, how many lines were skipped and any errors.
func numLinesInFile(path string, skipNumLines int64) (int64, int64, error) {
var numLines, skippedLines int64
file, err := os.Open(path)
if err != nil {
return 0, 0, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
counting := skipNumLines == 0
for scanner.Scan() {
if counting {
numLines++
} else {
skippedLines++
}
if skippedLines == skipNumLines {
counting = true
}
}
return numLines, skippedLines, scanner.Err()
}
// numLinesTotal goes through all the inputs and counts how many lines are available for processing.
func numLinesTotal(skipNumLines int64) (int64, error) {
var numLines int64
skippedLines := skipNumLines
for _, root := range flag.Args() {
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
if strings.HasSuffix(path, ".csv") || strings.HasSuffix(path, ".txt") ||
strings.HasSuffix(path, ".json") {
nl, sl, err := numLinesInFile(path, skippedLines)
if err != nil {
return err
}
numLines += nl
skippedLines -= sl
}
return nil
})
if err != nil {
return 0, err
}
}
return numLines, nil
}

View File

@ -0,0 +1,334 @@
package main
import (
"context"
"errors"
"fmt"
"math/rand"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"github.com/google/go-github/v31/github"
"github.com/inconshreveable/log15"
"github.com/prometheus/client_golang/prometheus"
"github.com/schollz/progressbar/v3"
"golang.org/x/oauth2"
"golang.org/x/time/rate"
)
func newGHEClient(ctx context.Context, baseURL, uploadURL, token string) (*github.Client, error) {
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
)
tc := oauth2.NewClient(ctx, ts)
return github.NewEnterpriseClient(baseURL, uploadURL, tc)
}
func init() {
rand.Seed(time.Now().UnixNano())
}
// randomOrgNameAndSize returns a random, unique name for an org and a random size of repos it should have
func randomOrgNameAndSize() (string, int) {
size := rand.Intn(500)
if size < 5 {
size = 5
}
name := fmt.Sprintf("%s-%d", getRandomName(0), size)
return name, size
}
// feederError is an error while processing an ownerRepo line. errType partitions the errors in 4 major categories
// to use in metrics in logging: api, clone, push and unknown.
type feederError struct {
// one of: api, clone, push, unknown
errType string
// underlying error
err error
}
func (e *feederError) Error() string {
return fmt.Sprintf("%v: %v", e.errType, e.err)
}
func (e *feederError) Unwrap() error {
return e.err
}
// worker processes ownerRepo strings, feeding them to GHE instance. it declares orgs if needed, clones from
// github.com, adds GHE as a remote, declares repo in GHE through API and does a git push to the GHE.
// there's many workers working at the same time, taking work from a work channel fed by a pump that reads lines
// from the input.
type worker struct {
// used in logs and metrics
name string
// index of the worker (which one in range [0, numWorkers)
index int
// directory to use for cloning from github.com
scratchDir string
// GHE API client
client *github.Client
admin string
token string
// gets the lines of work from this channel (each line has a owner/repo string in some format)
work <-chan string
// wait group to decrement when this worker is done working
wg *sync.WaitGroup
// terminal UI progress bar
bar *progressbar.ProgressBar
// some stats
numFailed int64
numSucceeded int64
// feeder DB is a sqlite DB, worker marks processed ownerRepos as successfully processed or failed
fdr *feederDB
// keeps track of org to which to add repos
// (when currentNumRepos reaches currentMaxRepos, it generates a new random triple of these)
currentOrg string
currentNumRepos int
currentMaxRepos int
// logger has worker name inprinted
logger log15.Logger
// rate limiter for the GHE API calls
rateLimiter *rate.Limiter
// how many simultaneous `git push` operations to the GHE
pushSem chan struct{}
// how many simultaneous `git clone` operations from github.com
cloneSem chan struct{}
// how many times to try to clone from github.com
numCloningAttempts int
// how long to wait before cutting short a cloning from github.com
cloneRepoTimeout time.Duration
// host to add as a remote to a cloned repo pointing to GHE instance
host string
}
// run spins until work channel closes or context cancels
func (wkr *worker) run(ctx context.Context) {
defer wkr.wg.Done()
wkr.currentOrg, wkr.currentMaxRepos = randomOrgNameAndSize()
wkr.logger.Debug("switching to org", "org", wkr.currentOrg)
// declare the first org to start the worker processing
err := wkr.addGHEOrg(ctx)
if err != nil {
wkr.logger.Error("failed to create org", "org", wkr.currentOrg, "error", err)
// add it to default org then
wkr.currentOrg = ""
} else {
err = wkr.fdr.declareOrg(wkr.currentOrg)
if err != nil {
wkr.logger.Error("failed to declare org", "org", wkr.currentOrg, "error", err)
}
}
for line := range wkr.work {
_ = wkr.bar.Add(1)
if ctx.Err() != nil {
return
}
xs := strings.Split(line, "/")
if len(xs) != 2 {
wkr.logger.Error("failed tos split line", "line", line)
continue
}
owner, repo := xs[0], xs[1]
// process one owner/repo
err := wkr.process(ctx, owner, repo)
reposProcessedCounter.With(prometheus.Labels{"worker": wkr.name}).Inc()
remainingWorkGauge.Add(-1.0)
if err != nil {
wkr.numFailed++
errType := "unknown"
var ferr *feederError
if errors.As(err, &ferr) {
errType = ferr.errType
}
reposFailedCounter.With(prometheus.Labels{"worker": wkr.name, "err_type": errType}).Inc()
_ = wkr.fdr.failed(line, errType)
} else {
reposSucceededCounter.Inc()
wkr.numSucceeded++
wkr.currentNumRepos++
err = wkr.fdr.succeeded(line, wkr.currentOrg)
if err != nil {
wkr.logger.Error("failed to mark succeeded repo", "ownerRepo", line, "error", err)
}
// switch to a new org
if wkr.currentNumRepos >= wkr.currentMaxRepos {
wkr.currentOrg, wkr.currentMaxRepos = randomOrgNameAndSize()
wkr.currentNumRepos = 0
wkr.logger.Debug("switching to org", "org", wkr.currentOrg)
err := wkr.addGHEOrg(ctx)
if err != nil {
wkr.logger.Error("failed to create org", "org", wkr.currentOrg, "error", err)
// add it to default org then
wkr.currentOrg = ""
} else {
err = wkr.fdr.declareOrg(wkr.currentOrg)
if err != nil {
wkr.logger.Error("failed to declare org", "org", wkr.currentOrg, "error", err)
}
}
}
}
ownerDir := filepath.Join(wkr.scratchDir, owner)
// clean up clone on disk
err = os.RemoveAll(ownerDir)
if err != nil {
wkr.logger.Error("failed to clean up cloned repo", "ownerRepo", line, "error", err, "ownerDir", ownerDir)
}
}
}
// process does the necessary work for one ownerRepo string: clone, declare repo in GHE through API, add remote and push
func (wkr *worker) process(ctx context.Context, owner, repo string) error {
err := wkr.cloneRepo(ctx, owner, repo)
if err != nil {
wkr.logger.Error("failed to clone repo", "owner", owner, "repo", repo, "error", err)
return &feederError{"clone", err}
}
gheRepo, err := wkr.addGHERepo(ctx, owner, repo)
if err != nil {
wkr.logger.Error("failed to create GHE repo", "owner", owner, "repo", repo, "error", err)
return &feederError{"api", err}
}
err = wkr.addRemote(ctx, gheRepo, owner, repo)
if err != nil {
wkr.logger.Error("failed to add GHE as a remote in cloned repo", "owner", owner, "repo", repo, "error", err)
return &feederError{"api", err}
}
for attempt := 0; attempt < wkr.numCloningAttempts && ctx.Err() == nil; attempt++ {
err = wkr.pushToGHE(ctx, owner, repo)
if err == nil {
return nil
}
wkr.logger.Error("failed to push cloned repo to GHE", "attempt", attempt+1, "owner", owner, "repo", repo, "error", err)
}
if ctx.Err() != nil {
return ctx.Err()
}
return &feederError{"push", err}
}
// cloneRepo clones the specified repo from github.com into the scratchDir
func (wkr *worker) cloneRepo(ctx context.Context, owner, repo string) error {
select {
case wkr.cloneSem <- struct{}{}:
defer func() {
<-wkr.cloneSem
}()
ownerDir := filepath.Join(wkr.scratchDir, owner)
err := os.MkdirAll(ownerDir, 0777)
if err != nil {
wkr.logger.Error("failed to create owner dir", "ownerDir", ownerDir, "error", err)
return err
}
ctx, cancel := context.WithTimeout(ctx, wkr.cloneRepoTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "git", "clone",
fmt.Sprintf("https://github.com/%s/%s", owner, repo))
cmd.Dir = ownerDir
cmd.Env = append(cmd.Env, "GIT_ASKPASS=/bin/echo")
return cmd.Run()
case <-ctx.Done():
return ctx.Err()
}
}
// addRemote declares the GHE as a remote to the cloned repo
func (wkr *worker) addRemote(ctx context.Context, gheRepo *github.Repository, owner, repo string) error {
repoDir := filepath.Join(wkr.scratchDir, owner, repo)
remoteURL := fmt.Sprintf("https://%s@%s/%s.git", wkr.token, wkr.host, *gheRepo.FullName)
cmd := exec.CommandContext(ctx, "git", "remote", "add", "ghe", remoteURL)
cmd.Dir = repoDir
return cmd.Run()
}
// pushToGHE does a `git push` command to the GHE remote
func (wkr *worker) pushToGHE(ctx context.Context, owner, repo string) error {
select {
case wkr.pushSem <- struct{}{}:
defer func() {
<-wkr.pushSem
}()
repoDir := filepath.Join(wkr.scratchDir, owner, repo)
ctx, cancel := context.WithTimeout(ctx, wkr.cloneRepoTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "git", "push", "ghe", "master")
cmd.Dir = repoDir
return cmd.Run()
case <-ctx.Done():
return ctx.Err()
}
}
// addGHEOrg uses the GHE API to declare the org at the GHE
func (wkr *worker) addGHEOrg(ctx context.Context) error {
err := wkr.rateLimiter.Wait(ctx)
if err != nil {
wkr.logger.Error("failed to get a request spot from rate limiter", "error", err)
return err
}
ctx, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel()
gheOrg := &github.Organization{
Login: github.String(wkr.currentOrg),
}
_, _, err = wkr.client.Admin.CreateOrg(ctx, gheOrg, wkr.admin)
return err
}
// addGHEOrg uses the GHE API to declare the repo at the GHE
func (wkr *worker) addGHERepo(ctx context.Context, owner, repo string) (*github.Repository, error) {
err := wkr.rateLimiter.Wait(ctx)
if err != nil {
wkr.logger.Error("failed to get a request spot from rate limiter", "error", err)
return nil, err
}
ctx, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel()
gheRepo := &github.Repository{
Name: github.String(fmt.Sprintf("%s-%s", owner, repo)),
}
gheReturnedRepo, _, err := wkr.client.Repositories.Create(ctx, wkr.currentOrg, gheRepo)
return gheReturnedRepo, err
}