From b202af3cf236e3b3d78503aa97412a7b10536ac2 Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Mon, 5 Mar 2018 11:02:31 +0000 Subject: [PATCH] added docker runner into core codebase supports both local command runner and docker runner added docker files for tex live also fixed tests so they exit correctly & removed debug lines --- app/coffee/CommandRunner.coffee | 49 +- app/coffee/CompileManager.coffee | 5 +- app/coffee/DockerLockManager.coffee | 56 ++ app/coffee/DockerRunner.coffee | 348 ++++++++++++ app/coffee/LatexRunner.coffee | 2 +- app/coffee/LocalCommandRunner.coffee | 44 ++ config/settings.defaults.coffee | 4 +- docker-compose-config.yml | 6 +- package.json | 2 +- .../coffee/SimpleLatexFileTests.coffee | 1 - test/acceptance/coffee/TimeoutTests.coffee | 1 - test/acceptance/coffee/helpers/Client.coffee | 1 - test/unit/coffee/CompileManagerTests.coffee | 2 - .../unit/coffee/DockerLockManagerTests.coffee | 145 +++++ test/unit/coffee/DockerRunnerTests.coffee | 499 ++++++++++++++++++ ...Manager.coffee => LockManagerTests.coffee} | 2 +- 16 files changed, 1108 insertions(+), 59 deletions(-) create mode 100644 app/coffee/DockerLockManager.coffee create mode 100644 app/coffee/DockerRunner.coffee create mode 100644 app/coffee/LocalCommandRunner.coffee create mode 100644 test/unit/coffee/DockerLockManagerTests.coffee create mode 100644 test/unit/coffee/DockerRunnerTests.coffee rename test/unit/coffee/{LockManager.coffee => LockManagerTests.coffee} (98%) diff --git a/app/coffee/CommandRunner.coffee b/app/coffee/CommandRunner.coffee index f47af00..2d1c3a9 100644 --- a/app/coffee/CommandRunner.coffee +++ b/app/coffee/CommandRunner.coffee @@ -1,44 +1,11 @@ -spawn = require("child_process").spawn +Settings = require "settings-sharelatex" logger = require "logger-sharelatex" -logger.info "using standard command runner" +if Settings.clsi?.dockerRunner == true + commandRunnerPath = "./DockerRunner" +else + commandRunnerPath = "./LocalCommandRunner" +logger.info commandRunnerPath:commandRunnerPath, "selecting command runner for clsi" +CommandRunner = require(commandRunnerPath) -module.exports = CommandRunner = - run: (project_id, command, directory, image, timeout, environment, callback = (error) ->) -> - command = (arg.replace('$COMPILE_DIR', directory) for arg in command) - logger.log project_id: project_id, command: command, directory: directory, "running command" - logger.warn "timeouts and sandboxing are not enabled with CommandRunner" - - # merge environment settings - env = {} - env[key] = value for key, value of process.env - env[key] = value for key, value of environment - - # run command as detached process so it has its own process group (which can be killed if needed) - proc = spawn command[0], command.slice(1), stdio: "inherit", cwd: directory, detached: true, env: env - - proc.on "error", (err)-> - logger.err err:err, project_id:project_id, command: command, directory: directory, "error running command" - callback(err) - - proc.on "close", (code, signal) -> - logger.info code:code, signal:signal, project_id:project_id, "command exited" - if signal is 'SIGTERM' # signal from kill method below - err = new Error("terminated") - err.terminated = true - return callback(err) - else if code is 1 # exit status from chktex - err = new Error("exited") - err.code = code - return callback(err) - else - callback() - - return proc.pid # return process id to allow job to be killed if necessary - - kill: (pid, callback = (error) ->) -> - try - process.kill -pid # kill all processes in group - catch err - return callback(err) - callback() +module.exports = CommandRunner diff --git a/app/coffee/CompileManager.coffee b/app/coffee/CompileManager.coffee index 0db15e5..807b8c4 100644 --- a/app/coffee/CompileManager.coffee +++ b/app/coffee/CompileManager.coffee @@ -15,10 +15,7 @@ fse = require "fs-extra" os = require("os") async = require "async" Errors = require './Errors' - -commandRunner = Settings.clsi?.commandRunner or "./CommandRunner" -logger.info commandRunner:commandRunner, "selecting command runner for clsi" -CommandRunner = require(commandRunner) +CommandRunner = require "./CommandRunner" getCompileName = (project_id, user_id) -> if user_id? then "#{project_id}-#{user_id}" else project_id diff --git a/app/coffee/DockerLockManager.coffee b/app/coffee/DockerLockManager.coffee new file mode 100644 index 0000000..739f2cd --- /dev/null +++ b/app/coffee/DockerLockManager.coffee @@ -0,0 +1,56 @@ +logger = require "logger-sharelatex" + +LockState = {} # locks for docker container operations, by container name + +module.exports = LockManager = + + MAX_LOCK_HOLD_TIME: 15000 # how long we can keep a lock + MAX_LOCK_WAIT_TIME: 10000 # how long we wait for a lock + LOCK_TEST_INTERVAL: 1000 # retry time + + tryLock: (key, callback = (err, gotLock) ->) -> + existingLock = LockState[key] + if existingLock? # the lock is already taken, check how old it is + lockAge = Date.now() - existingLock.created + if lockAge < LockManager.MAX_LOCK_HOLD_TIME + return callback(null, false) # we didn't get the lock, bail out + else + logger.error {key: key, lock: existingLock, age:lockAge}, "taking old lock by force" + # take the lock + LockState[key] = lockValue = {created: Date.now()} + callback(null, true, lockValue) + + getLock: (key, callback = (error, lockValue) ->) -> + startTime = Date.now() + do attempt = () -> + LockManager.tryLock key, (error, gotLock, lockValue) -> + return callback(error) if error? + if gotLock + callback(null, lockValue) + else if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME + e = new Error("Lock timeout") + e.key = key + return callback(e) + else + setTimeout attempt, LockManager.LOCK_TEST_INTERVAL + + releaseLock: (key, lockValue, callback = (error) ->) -> + existingLock = LockState[key] + if existingLock is lockValue # lockValue is an object, so we can test by reference + delete LockState[key] # our lock, so we can free it + callback() + else if existingLock? # lock exists but doesn't match ours + logger.error {key:key, lock: existingLock}, "tried to release lock taken by force" + callback() + else + logger.error {key:key, lock: existingLock}, "tried to release lock that has gone" + callback() + + runWithLock: (key, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) -> + LockManager.getLock key, (error, lockValue) -> + return callback(error) if error? + runner (error1, args...) -> + LockManager.releaseLock key, lockValue, (error2) -> + error = error1 or error2 + return callback(error) if error? + callback(null, args...) diff --git a/app/coffee/DockerRunner.coffee b/app/coffee/DockerRunner.coffee new file mode 100644 index 0000000..f14b5c6 --- /dev/null +++ b/app/coffee/DockerRunner.coffee @@ -0,0 +1,348 @@ +Settings = require "settings-sharelatex" +logger = require "logger-sharelatex" +Docker = require("dockerode") +dockerode = new Docker() +crypto = require "crypto" +async = require "async" +LockManager = require "./DockerLockManager" +fs = require "fs" +Path = require 'path' +_ = require "underscore" + +logger.info "using docker runner" + +usingSiblingContainers = () -> + Settings?.path?.sandboxedCompilesHostDir? + +module.exports = DockerRunner = + ERR_NOT_DIRECTORY: new Error("not a directory") + ERR_TERMINATED: new Error("terminated") + ERR_EXITED: new Error("exited") + ERR_TIMED_OUT: new Error("container timed out") + + run: (project_id, command, directory, image, timeout, environment, callback = (error, output) ->) -> + + if usingSiblingContainers() + _newPath = Settings.path.sandboxedCompilesHostDir + logger.log {path: _newPath}, "altering bind path for sibling containers" + # Server Pro, example: + # '/var/lib/sharelatex/data/compiles/' + # ... becomes ... + # '/opt/sharelatex_data/data/compiles/' + directory = Path.join(Settings.path.sandboxedCompilesHostDir, Path.basename(directory)) + + volumes = {} + volumes[directory] = "/compile" + + command = (arg.toString().replace?('$COMPILE_DIR', "/compile") for arg in command) + if !image? + image = Settings.clsi.docker.image + + options = DockerRunner._getContainerOptions(command, image, volumes, timeout, environment) + fingerprint = DockerRunner._fingerprintContainer(options) + options.name = name = "project-#{project_id}-#{fingerprint}" + + logger.log project_id: project_id, options: options, "running docker container" + DockerRunner._runAndWaitForContainer options, volumes, timeout, (error, output) -> + if error?.message?.match("HTTP code is 500") + logger.log err: error, project_id: project_id, "error running container so destroying and retrying" + DockerRunner.destroyContainer name, null, true, (error) -> + return callback(error) if error? + DockerRunner._runAndWaitForContainer options, volumes, timeout, callback + else + callback(error, output) + + return name # pass back the container name to allow it to be killed + + kill: (container_id, callback = (error) ->) -> + logger.log container_id: container_id, "sending kill signal to container" + container = dockerode.getContainer(container_id) + container.kill (error) -> + if error? and error?.message?.match?(/Cannot kill container .* is not running/) + logger.warn err: error, container_id: container_id, "container not running, continuing" + error = null + if error? + logger.error err: error, container_id: container_id, "error killing container" + return callback(error) + else + callback() + + _runAndWaitForContainer: (options, volumes, timeout, _callback = (error, output) ->) -> + callback = (args...) -> + _callback(args...) + # Only call the callback once + _callback = () -> + + name = options.name + + streamEnded = false + containerReturned = false + output = {} + + callbackIfFinished = () -> + if streamEnded and containerReturned + callback(null, output) + + attachStreamHandler = (error, _output) -> + return callback(error) if error? + output = _output + streamEnded = true + callbackIfFinished() + + DockerRunner.startContainer options, volumes, attachStreamHandler, (error, containerId) -> + return callback(error) if error? + + DockerRunner.waitForContainer name, timeout, (error, exitCode) -> + return callback(error) if error? + if exitCode is 137 # exit status from kill -9 + err = DockerRunner.ERR_TERMINATED + err.terminated = true + return callback(err) + if exitCode is 1 # exit status from chktex + err = DockerRunner.ERR_EXITED + err.code = exitCode + return callback(err) + containerReturned = true + callbackIfFinished() + + _getContainerOptions: (command, image, volumes, timeout, environment) -> + timeoutInSeconds = timeout / 1000 + + if Settings.path?.synctexBinHostPath? + volumes[Settings.path.synctexBinHostPath] = "/opt/synctex:ro" + + dockerVolumes = {} + for hostVol, dockerVol of volumes + dockerVolumes[dockerVol] = {} + + if volumes[hostVol].slice(-3).indexOf(":r") == -1 + volumes[hostVol] = "#{dockerVol}:rw" + + + # merge settings and environment parameter + env = {} + for src in [Settings.clsi.docker.env, environment or {}] + env[key] = value for key, value of src + # set the path based on the image year + if m = image.match /:([0-9]+)\.[0-9]+/ + year = m[1] + else + year = "2014" + env['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/texlive/#{year}/bin/x86_64-linux/" + options = + "Cmd" : command, + "Image" : image + "Volumes" : dockerVolumes + "WorkingDir" : "/compile" + "NetworkDisabled" : true + "Memory" : 1024 * 1024 * 1024 * 1024 # 1 Gb + "User" : Settings.clsi.docker.user + "Env" : ("#{key}=#{value}" for key, value of env) # convert the environment hash to an array + "HostConfig" : + "Binds": ("#{hostVol}:#{dockerVol}" for hostVol, dockerVol of volumes) + "LogConfig": {"Type": "none", "Config": {}} + "Ulimits": [{'Name': 'cpu', 'Soft': timeoutInSeconds+5, 'Hard': timeoutInSeconds+10}] + "CapDrop": "ALL" + "SecurityOpt": ["no-new-privileges"] + if Settings.clsi.docker.seccomp_profile? + options.HostConfig.SecurityOpt.push "seccomp=#{Settings.clsi.docker.seccomp_profile}" + return options + + _fingerprintContainer: (containerOptions) -> + # Yay, Hashing! + json = JSON.stringify(containerOptions) + return crypto.createHash("md5").update(json).digest("hex") + + startContainer: (options, volumes, attachStreamHandler, callback) -> + LockManager.runWithLock options.name, (releaseLock) -> + # Check that volumes exist before starting the container. + # When a container is started with volume pointing to a + # non-existent directory then docker creates the directory but + # with root ownership. + DockerRunner._checkVolumes options, volumes, (err) -> + return releaseLock(err) if err? + DockerRunner._startContainer options, volumes, attachStreamHandler, releaseLock + , callback + + # Check that volumes exist and are directories + _checkVolumes: (options, volumes, callback = (error, containerName) ->) -> + if usingSiblingContainers() + # Server Pro, with sibling-containers active, skip checks + return callback(null) + + checkVolume = (path, cb) -> + fs.stat path, (err, stats) -> + return cb(err) if err? + return cb(DockerRunner.ERR_NOT_DIRECTORY) if not stats?.isDirectory() + cb() + jobs = [] + for vol of volumes + do (vol) -> + jobs.push (cb) -> checkVolume(vol, cb) + async.series jobs, callback + + _startContainer: (options, volumes, attachStreamHandler, callback = ((error, output) ->)) -> + callback = _.once(callback) + name = options.name + + logger.log {container_name: name}, "starting container" + container = dockerode.getContainer(name) + + createAndStartContainer = -> + dockerode.createContainer options, (error, container) -> + return callback(error) if error? + startExistingContainer() + + startExistingContainer = -> + DockerRunner.attachToContainer options.name, attachStreamHandler, (error)-> + return callback(error) if error? + container.start (error) -> + if error? and error?.statusCode != 304 #already running + return callback(error) + else + callback() + + container.inspect (error, stats)-> + if error?.statusCode == 404 + createAndStartContainer() + else if error? + logger.err {container_name: name}, "unable to inspect container to start" + return callback(error) + else + startExistingContainer() + + + attachToContainer: (containerId, attachStreamHandler, attachStartCallback) -> + container = dockerode.getContainer(containerId) + container.attach {stdout: 1, stderr: 1, stream: 1}, (error, stream) -> + if error? + logger.error err: error, container_id: containerId, "error attaching to container" + return attachStartCallback(error) + else + attachStartCallback() + + + logger.log container_id: containerId, "attached to container" + + MAX_OUTPUT = 1024 * 1024 # limit output to 1MB + createStringOutputStream = (name) -> + return { + data: "" + overflowed: false + write: (data) -> + return if @overflowed + if @data.length < MAX_OUTPUT + @data += data + else + logger.error container_id: containerId, length: @data.length, maxLen: MAX_OUTPUT, "#{name} exceeds max size" + @data += "(...truncated at #{MAX_OUTPUT} chars...)" + @overflowed = true + # kill container if too much output + # docker.containers.kill(containerId, () ->) + } + + stdout = createStringOutputStream "stdout" + stderr = createStringOutputStream "stderr" + + container.modem.demuxStream(stream, stdout, stderr) + + stream.on "error", (err) -> + logger.error err: err, container_id: containerId, "error reading from container stream" + + stream.on "end", () -> + attachStreamHandler null, {stdout: stdout.data, stderr: stderr.data} + + waitForContainer: (containerId, timeout, _callback = (error, exitCode) ->) -> + callback = (args...) -> + _callback(args...) + # Only call the callback once + _callback = () -> + + container = dockerode.getContainer(containerId) + + timedOut = false + timeoutId = setTimeout () -> + timedOut = true + logger.log container_id: containerId, "timeout reached, killing container" + container.kill(() ->) + , timeout + + logger.log container_id: containerId, "waiting for docker container" + container.wait (error, res) -> + if error? + clearTimeout timeoutId + logger.error err: error, container_id: containerId, "error waiting for container" + return callback(error) + if timedOut + logger.log containerId: containerId, "docker container timed out" + error = DockerRunner.ERR_TIMED_OUT + error.timedout = true + callback error + else + clearTimeout timeoutId + logger.log container_id: containerId, exitCode: res.StatusCode, "docker container returned" + callback null, res.StatusCode + + destroyContainer: (containerName, containerId, shouldForce, callback = (error) ->) -> + # We want the containerName for the lock and, ideally, the + # containerId to delete. There is a bug in the docker.io module + # where if you delete by name and there is an error, it throws an + # async exception, but if you delete by id it just does a normal + # error callback. We fall back to deleting by name if no id is + # supplied. + LockManager.runWithLock containerName, (releaseLock) -> + DockerRunner._destroyContainer containerId or containerName, shouldForce, releaseLock + , callback + + _destroyContainer: (containerId, shouldForce, callback = (error) ->) -> + logger.log container_id: containerId, "destroying docker container" + container = dockerode.getContainer(containerId) + container.remove {force: shouldForce == true}, (error) -> + if error? and error?.statusCode == 404 + logger.warn err: error, container_id: containerId, "container not found, continuing" + error = null + if error? + logger.error err: error, container_id: containerId, "error destroying container" + else + logger.log container_id: containerId, "destroyed container" + callback(error) + + # handle expiry of docker containers + + MAX_CONTAINER_AGE: Settings.clsi.docker.maxContainerAge or oneHour = 60 * 60 * 1000 + + examineOldContainer: (container, callback = (error, name, id, ttl)->) -> + name = container.Name or container.Names?[0] + created = container.Created * 1000 # creation time is returned in seconds + now = Date.now() + age = now - created + maxAge = DockerRunner.MAX_CONTAINER_AGE + ttl = maxAge - age + logger.log {containerName: name, created: created, now: now, age: age, maxAge: maxAge, ttl: ttl}, "checking whether to destroy container" + callback(null, name, container.Id, ttl) + + destroyOldContainers: (callback = (error) ->) -> + dockerode.listContainers all: true, (error, containers) -> + return callback(error) if error? + jobs = [] + for container in containers or [] + do (container) -> + DockerRunner.examineOldContainer container, (err, name, id, ttl) -> + if name.slice(0, 9) == '/project-' && ttl <= 0 + jobs.push (cb) -> + DockerRunner.destroyContainer name, id, false, () -> cb() + # Ignore errors because some containers get stuck but + # will be destroyed next time + async.series jobs, callback + + startContainerMonitor: () -> + logger.log {maxAge: DockerRunner.MAX_CONTAINER_AGE}, "starting container expiry" + # randomise the start time + randomDelay = Math.floor(Math.random() * 5 * 60 * 1000) + setTimeout () -> + setInterval () -> + DockerRunner.destroyOldContainers() + , oneHour = 60 * 60 * 1000 + , randomDelay + +DockerRunner.startContainerMonitor() \ No newline at end of file diff --git a/app/coffee/LatexRunner.coffee b/app/coffee/LatexRunner.coffee index 6a5a4f6..3571af2 100644 --- a/app/coffee/LatexRunner.coffee +++ b/app/coffee/LatexRunner.coffee @@ -2,7 +2,7 @@ Path = require "path" Settings = require "settings-sharelatex" logger = require "logger-sharelatex" Metrics = require "./Metrics" -CommandRunner = require(Settings.clsi?.commandRunner or "./CommandRunner") +CommandRunner = require "./CommandRunner" ProcessTable = {} # table of currently running jobs (pids or docker container names) diff --git a/app/coffee/LocalCommandRunner.coffee b/app/coffee/LocalCommandRunner.coffee new file mode 100644 index 0000000..f47af00 --- /dev/null +++ b/app/coffee/LocalCommandRunner.coffee @@ -0,0 +1,44 @@ +spawn = require("child_process").spawn +logger = require "logger-sharelatex" + +logger.info "using standard command runner" + +module.exports = CommandRunner = + run: (project_id, command, directory, image, timeout, environment, callback = (error) ->) -> + command = (arg.replace('$COMPILE_DIR', directory) for arg in command) + logger.log project_id: project_id, command: command, directory: directory, "running command" + logger.warn "timeouts and sandboxing are not enabled with CommandRunner" + + # merge environment settings + env = {} + env[key] = value for key, value of process.env + env[key] = value for key, value of environment + + # run command as detached process so it has its own process group (which can be killed if needed) + proc = spawn command[0], command.slice(1), stdio: "inherit", cwd: directory, detached: true, env: env + + proc.on "error", (err)-> + logger.err err:err, project_id:project_id, command: command, directory: directory, "error running command" + callback(err) + + proc.on "close", (code, signal) -> + logger.info code:code, signal:signal, project_id:project_id, "command exited" + if signal is 'SIGTERM' # signal from kill method below + err = new Error("terminated") + err.terminated = true + return callback(err) + else if code is 1 # exit status from chktex + err = new Error("exited") + err.code = code + return callback(err) + else + callback() + + return proc.pid # return process id to allow job to be killed if necessary + + kill: (pid, callback = (error) ->) -> + try + process.kill -pid # kill all processes in group + catch err + return callback(err) + callback() diff --git a/config/settings.defaults.coffee b/config/settings.defaults.coffee index c29829e..c76b251 100644 --- a/config/settings.defaults.coffee +++ b/config/settings.defaults.coffee @@ -29,9 +29,9 @@ module.exports = project_cache_length_ms: 1000 * 60 * 60 * 24 parallelFileDownloads:1 -if process.env["COMMAND_RUNNER"] +if process.env["DOCKER_RUNNER"] module.exports.clsi = - commandRunner: process.env["COMMAND_RUNNER"] + dockerRunner: process.env["DOCKER_RUNNER"] == "true" docker: image: process.env["TEXLIVE_IMAGE"] or "quay.io/sharelatex/texlive-full:2017.1" env: diff --git a/docker-compose-config.yml b/docker-compose-config.yml index 6e1fca4..55a3f01 100644 --- a/docker-compose-config.yml +++ b/docker-compose-config.yml @@ -6,12 +6,11 @@ services: TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2017.1 TEXLIVE_IMAGE_USER: root SHARELATEX_CONFIG: /app/config/settings.defaults.coffee - COMMAND_RUNNER: docker-runner-sharelatex + DOCKER_RUNNER: "true" COMPILES_HOST_DIR: $PWD/compiles SYNCTEX_BIN_HOST_PATH: $PWD/bin/synctex volumes: - /var/run/docker.sock:/var/run/docker.sock - - ./docker-runner:/app/node_modules/docker-runner-sharelatex - ./compiles:/app/compiles ci: @@ -19,10 +18,9 @@ services: TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2017.1 TEXLIVE_IMAGE_USER: root SHARELATEX_CONFIG: /app/config/settings.defaults.coffee - COMMAND_RUNNER: docker-runner-sharelatex + DOCKER_RUNNER: "true" COMPILES_HOST_DIR: $PWD/compiles SYNCTEX_BIN_HOST_PATH: $PWD/bin/synctex volumes: - /var/run/docker.sock:/var/run/docker.sock:rw - - ./docker-runner:/app/node_modules/docker-runner-sharelatex - ./compiles:/app/compiles \ No newline at end of file diff --git a/package.json b/package.json index ae37955..6c5361c 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ "compile": "npm run compile:app && npm run compile:test:acceptance && npm run compile:test:smoke", "start": "npm run compile:app && node app.js", "test:acceptance:_run": "mocha --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js", - "test:unit:_run": "mocha --recursive --reporter spec $@ test/unit/js", + "test:unit:_run": "mocha --recursive --exit --reporter spec $@ test/unit/js", "test:unit": "npm run compile:app && npm run compile:unit_tests && npm run test:unit:_run -- $@", "compile:unit_tests": "[ -e test/unit ] && coffee -o test/unit/js -c test/unit/coffee || echo 'No unit tests to compile'", "compile:acceptance_tests": "[ -e test/acceptance ] && coffee -o test/acceptance/js -c test/acceptance/coffee || echo 'No acceptance tests to compile'", diff --git a/test/acceptance/coffee/SimpleLatexFileTests.coffee b/test/acceptance/coffee/SimpleLatexFileTests.coffee index 0d3337a..95b667b 100644 --- a/test/acceptance/coffee/SimpleLatexFileTests.coffee +++ b/test/acceptance/coffee/SimpleLatexFileTests.coffee @@ -21,7 +21,6 @@ describe "Simple LaTeX file", -> it "should return the PDF", -> pdf = Client.getOutputFile(@body, "pdf") - console.log @body pdf.type.should.equal "pdf" it "should return the log", -> diff --git a/test/acceptance/coffee/TimeoutTests.coffee b/test/acceptance/coffee/TimeoutTests.coffee index 9195474..877223a 100644 --- a/test/acceptance/coffee/TimeoutTests.coffee +++ b/test/acceptance/coffee/TimeoutTests.coffee @@ -23,7 +23,6 @@ describe "Timed out compile", -> Client.compile @project_id, @request, (@error, @res, @body) => done() it "should return a timeout error", -> - console.log @body.compile, "!!!1111" @body.compile.error.should.equal "container timed out" it "should return a timedout status", -> diff --git a/test/acceptance/coffee/helpers/Client.coffee b/test/acceptance/coffee/helpers/Client.coffee index 546c235..3913170 100644 --- a/test/acceptance/coffee/helpers/Client.coffee +++ b/test/acceptance/coffee/helpers/Client.coffee @@ -11,7 +11,6 @@ module.exports = Client = Math.random().toString(16).slice(2) compile: (project_id, data, callback = (error, res, body) ->) -> - console.log("#{@host}/project/#{project_id}/compile") request.post { url: "#{@host}/project/#{project_id}/compile" json: diff --git a/test/unit/coffee/CompileManagerTests.coffee b/test/unit/coffee/CompileManagerTests.coffee index 448e06c..1836c05 100644 --- a/test/unit/coffee/CompileManagerTests.coffee +++ b/test/unit/coffee/CompileManagerTests.coffee @@ -293,8 +293,6 @@ describe "CompileManager", -> # .should.equal true it "should call the callback with the parsed output", -> - console.log(@file_name, @line, @column) - console.log @callback.args[0] @callback .calledWith(null, [{ file: @file_name diff --git a/test/unit/coffee/DockerLockManagerTests.coffee b/test/unit/coffee/DockerLockManagerTests.coffee new file mode 100644 index 0000000..6161bec --- /dev/null +++ b/test/unit/coffee/DockerLockManagerTests.coffee @@ -0,0 +1,145 @@ +SandboxedModule = require('sandboxed-module') +sinon = require('sinon') +require('chai').should() +require "coffee-script" +modulePath = require('path').join __dirname, '../../../app/coffee/DockerLockManager' + +describe "LockManager", -> + beforeEach -> + @LockManager = SandboxedModule.require modulePath, requires: + "settings-sharelatex": @Settings = + clsi: docker: {} + "logger-sharelatex": @logger = { log: sinon.stub(), error: sinon.stub() } + + describe "runWithLock", -> + describe "with a single lock", -> + beforeEach (done) -> + @callback = sinon.stub() + @LockManager.runWithLock "lock-one", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world") + , 100 + , (err, args...) => + @callback(err,args...) + done() + + it "should call the callback", -> + @callback.calledWith(null,"hello","world").should.equal true + + describe "with two locks", -> + beforeEach (done) -> + @callback1 = sinon.stub() + @callback2 = sinon.stub() + @LockManager.runWithLock "lock-one", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","one") + , 100 + , (err, args...) => + @callback1(err,args...) + @LockManager.runWithLock "lock-two", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","two") + , 200 + , (err, args...) => + @callback2(err,args...) + done() + + it "should call the first callback", -> + @callback1.calledWith(null,"hello","world","one").should.equal true + + it "should call the second callback", -> + @callback2.calledWith(null,"hello","world","two").should.equal true + + describe "with lock contention", -> + describe "where the first lock is released quickly", -> + beforeEach (done) -> + @LockManager.MAX_LOCK_WAIT_TIME = 1000 + @LockManager.LOCK_TEST_INTERVAL = 100 + @callback1 = sinon.stub() + @callback2 = sinon.stub() + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","one") + , 100 + , (err, args...) => + @callback1(err,args...) + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","two") + , 200 + , (err, args...) => + @callback2(err,args...) + done() + + it "should call the first callback", -> + @callback1.calledWith(null,"hello","world","one").should.equal true + + it "should call the second callback", -> + @callback2.calledWith(null,"hello","world","two").should.equal true + + describe "where the first lock is held longer than the waiting time", -> + beforeEach (done) -> + @LockManager.MAX_LOCK_HOLD_TIME = 10000 + @LockManager.MAX_LOCK_WAIT_TIME = 1000 + @LockManager.LOCK_TEST_INTERVAL = 100 + @callback1 = sinon.stub() + @callback2 = sinon.stub() + doneOne = doneTwo = false + finish = (key) -> + doneOne = true if key is 1 + doneTwo = true if key is 2 + done() if doneOne and doneTwo + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","one") + , 1100 + , (err, args...) => + @callback1(err,args...) + finish(1) + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","two") + , 100 + , (err, args...) => + @callback2(err,args...) + finish(2) + + it "should call the first callback", -> + @callback1.calledWith(null,"hello","world","one").should.equal true + + it "should call the second callback with an error", -> + error = sinon.match.instanceOf Error + @callback2.calledWith(error).should.equal true + + describe "where the first lock is held longer than the max holding time", -> + beforeEach (done) -> + @LockManager.MAX_LOCK_HOLD_TIME = 1000 + @LockManager.MAX_LOCK_WAIT_TIME = 2000 + @LockManager.LOCK_TEST_INTERVAL = 100 + @callback1 = sinon.stub() + @callback2 = sinon.stub() + doneOne = doneTwo = false + finish = (key) -> + doneOne = true if key is 1 + doneTwo = true if key is 2 + done() if doneOne and doneTwo + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","one") + , 1500 + , (err, args...) => + @callback1(err,args...) + finish(1) + @LockManager.runWithLock "lock", (releaseLock) -> + setTimeout () -> + releaseLock(null, "hello", "world","two") + , 100 + , (err, args...) => + @callback2(err,args...) + finish(2) + + it "should call the first callback", -> + @callback1.calledWith(null,"hello","world","one").should.equal true + + it "should call the second callback", -> + @callback2.calledWith(null,"hello","world","two").should.equal true diff --git a/test/unit/coffee/DockerRunnerTests.coffee b/test/unit/coffee/DockerRunnerTests.coffee new file mode 100644 index 0000000..5a697e2 --- /dev/null +++ b/test/unit/coffee/DockerRunnerTests.coffee @@ -0,0 +1,499 @@ +SandboxedModule = require('sandboxed-module') +sinon = require('sinon') +require('chai').should() +expect = require('chai').expect +require "coffee-script" +modulePath = require('path').join __dirname, '../../../app/coffee/DockerRunner' +Path = require "path" + +describe "DockerRunner", -> + beforeEach -> + @container = container = {} + @DockerRunner = SandboxedModule.require modulePath, requires: + "settings-sharelatex": @Settings = + clsi: docker: {} + path: {} + "logger-sharelatex": @logger = { + log: sinon.stub(), + error: sinon.stub(), + info: sinon.stub(), + warn: sinon.stub() + } + "dockerode": class Docker + getContainer: sinon.stub().returns(container) + createContainer: sinon.stub().yields(null, container) + listContainers: sinon.stub() + "fs": @fs = { stat: sinon.stub().yields(null,{isDirectory:()->true}) } + "./Metrics": + Timer: class Timer + done: () -> + "./LockManager": + runWithLock: (key, runner, callback) -> runner(callback) + @Docker = Docker + @getContainer = Docker::getContainer + @createContainer = Docker::createContainer + @listContainers = Docker::listContainers + + @directory = "/local/compile/directory" + @mainFile = "main-file.tex" + @compiler = "pdflatex" + @image = "example.com/sharelatex/image:2016.2" + @env = {} + @callback = sinon.stub() + @project_id = "project-id-123" + @volumes = + "/local/compile/directory": "/compile" + @Settings.clsi.docker.image = @defaultImage = "default-image" + @Settings.clsi.docker.env = PATH: "mock-path" + + describe "run", -> + beforeEach (done)-> + @DockerRunner._getContainerOptions = sinon.stub().returns(@options = {mockoptions: "foo"}) + @DockerRunner._fingerprintContainer = sinon.stub().returns(@fingerprint = "fingerprint") + + @name = "project-#{@project_id}-#{@fingerprint}" + + @command = ["mock", "command", "--outdir=$COMPILE_DIR"] + @command_with_dir = ["mock", "command", "--outdir=/compile"] + @timeout = 42000 + done() + + describe "successfully", -> + beforeEach (done)-> + @DockerRunner._runAndWaitForContainer = sinon.stub().callsArgWith(3, null, @output = "mock-output") + @DockerRunner.run @project_id, @command, @directory, @image, @timeout, @env, (err, output)=> + @callback(err, output) + done() + + it "should generate the options for the container", -> + @DockerRunner._getContainerOptions + .calledWith(@command_with_dir, @image, @volumes, @timeout) + .should.equal true + + it "should generate the fingerprint from the returned options", -> + @DockerRunner._fingerprintContainer + .calledWith(@options) + .should.equal true + + it "should do the run", -> + @DockerRunner._runAndWaitForContainer + .calledWith(@options, @volumes, @timeout) + .should.equal true + + it "should call the callback", -> + @callback.calledWith(null, @output).should.equal true + + describe 'when path.sandboxedCompilesHostDir is set', -> + + beforeEach -> + @Settings.path.sandboxedCompilesHostDir = '/some/host/dir/compiles' + @directory = '/var/lib/sharelatex/data/compiles/xyz' + @DockerRunner._runAndWaitForContainer = sinon.stub().callsArgWith(3, null, @output = "mock-output") + @DockerRunner.run @project_id, @command, @directory, @image, @timeout, @env, @callback + + it 'should re-write the bind directory', -> + volumes = @DockerRunner._runAndWaitForContainer.lastCall.args[1] + expect(volumes).to.deep.equal { + '/some/host/dir/compiles/xyz': '/compile' + } + + it "should call the callback", -> + @callback.calledWith(null, @output).should.equal true + + describe "when the run throws an error", -> + beforeEach -> + firstTime = true + @output = "mock-output" + @DockerRunner._runAndWaitForContainer = (options, volumes, timeout, callback = (error, output)->) => + if firstTime + firstTime = false + callback new Error("HTTP code is 500 which indicates error: server error") + else + callback(null, @output) + sinon.spy @DockerRunner, "_runAndWaitForContainer" + @DockerRunner.destroyContainer = sinon.stub().callsArg(3) + @DockerRunner.run @project_id, @command, @directory, @image, @timeout, @env, @callback + + it "should do the run twice", -> + @DockerRunner._runAndWaitForContainer + .calledTwice.should.equal true + + it "should destroy the container in between", -> + @DockerRunner.destroyContainer + .calledWith(@name, null) + .should.equal true + + it "should call the callback", -> + @callback.calledWith(null, @output).should.equal true + + describe "with no image", -> + beforeEach -> + @DockerRunner._runAndWaitForContainer = sinon.stub().callsArgWith(3, null, @output = "mock-output") + @DockerRunner.run @project_id, @command, @directory, null, @timeout, @env, @callback + + it "should use the default image", -> + @DockerRunner._getContainerOptions + .calledWith(@command_with_dir, @defaultImage, @volumes, @timeout) + .should.equal true + + describe "_runAndWaitForContainer", -> + beforeEach -> + @options = {mockoptions: "foo", name: @name = "mock-name"} + @DockerRunner.startContainer = (options, volumes, attachStreamHandler, callback) => + attachStreamHandler(null, @output = "mock-output") + callback(null, @containerId = "container-id") + sinon.spy @DockerRunner, "startContainer" + @DockerRunner.waitForContainer = sinon.stub().callsArgWith(2, null, @exitCode = 42) + @DockerRunner._runAndWaitForContainer @options, @volumes, @timeout, @callback + + it "should create/start the container", -> + @DockerRunner.startContainer + .calledWith(@options, @volumes) + .should.equal true + + it "should wait for the container to finish", -> + @DockerRunner.waitForContainer + .calledWith(@name, @timeout) + .should.equal true + + it "should call the callback with the output", -> + @callback.calledWith(null, @output).should.equal true + + describe "startContainer", -> + beforeEach -> + @attachStreamHandler = sinon.stub() + @attachStreamHandler.cock = true + @options = {mockoptions: "foo", name: "mock-name"} + @container.inspect = sinon.stub().callsArgWith(0) + @DockerRunner.attachToContainer = (containerId, attachStreamHandler, cb)=> + attachStreamHandler() + cb() + sinon.spy @DockerRunner, "attachToContainer" + + + + describe "when the container exists", -> + beforeEach -> + @container.inspect = sinon.stub().callsArgWith(0) + @container.start = sinon.stub().yields() + + @DockerRunner.startContainer @options, @volumes, @callback, -> + + it "should start the container with the given name", -> + @getContainer + .calledWith(@options.name) + .should.equal true + @container.start + .called + .should.equal true + + it "should not try to create the container", -> + @createContainer.called.should.equal false + + it "should attach to the container", -> + @DockerRunner.attachToContainer.called.should.equal true + + it "should call the callback", -> + @callback.called.should.equal true + + it "should attach before the container starts", -> + sinon.assert.callOrder(@DockerRunner.attachToContainer, @container.start) + + describe "when the container does not exist", -> + beforeEach ()-> + exists = false + @container.start = sinon.stub().yields() + @container.inspect = sinon.stub().callsArgWith(0, {statusCode:404}) + @DockerRunner.startContainer @options, @volumes, @attachStreamHandler, @callback + + it "should create the container", -> + @createContainer + .calledWith(@options) + .should.equal true + + it "should call the callback and stream handler", -> + @attachStreamHandler.called.should.equal true + @callback.called.should.equal true + + it "should attach to the container", -> + @DockerRunner.attachToContainer.called.should.equal true + + it "should attach before the container starts", -> + sinon.assert.callOrder(@DockerRunner.attachToContainer, @container.start) + + + describe "when the container is already running", -> + beforeEach -> + error = new Error("HTTP code is 304 which indicates error: server error - start: Cannot start container #{@name}: The container MOCKID is already running.") + error.statusCode = 304 + @container.start = sinon.stub().yields(error) + @container.inspect = sinon.stub().callsArgWith(0) + @DockerRunner.startContainer @options, @volumes, @attachStreamHandler, @callback + + it "should not try to create the container", -> + @createContainer.called.should.equal false + + it "should call the callback and stream handler without an error", -> + @attachStreamHandler.called.should.equal true + @callback.called.should.equal true + + describe "when a volume does not exist", -> + beforeEach ()-> + @fs.stat = sinon.stub().yields(new Error("no such path")) + @DockerRunner.startContainer @options, @volumes, @attachStreamHandler, @callback + + it "should not try to create the container", -> + @createContainer.called.should.equal false + + it "should call the callback with an error", -> + @callback.calledWith(new Error()).should.equal true + + describe "when a volume exists but is not a directory", -> + beforeEach -> + @fs.stat = sinon.stub().yields(null, {isDirectory: () -> return false}) + @DockerRunner.startContainer @options, @volumes, @attachStreamHandler, @callback + + it "should not try to create the container", -> + @createContainer.called.should.equal false + + it "should call the callback with an error", -> + @callback.calledWith(new Error()).should.equal true + + describe "when a volume does not exist, but sibling-containers are used", -> + beforeEach -> + @fs.stat = sinon.stub().yields(new Error("no such path")) + @Settings.path.sandboxedCompilesHostDir = '/some/path' + @container.start = sinon.stub().yields() + @DockerRunner.startContainer @options, @volumes, @callback + + afterEach -> + delete @Settings.path.sandboxedCompilesHostDir + + it "should start the container with the given name", -> + @getContainer + .calledWith(@options.name) + .should.equal true + @container.start + .called + .should.equal true + + it "should not try to create the container", -> + @createContainer.called.should.equal false + + it "should call the callback", -> + @callback.called.should.equal true + @callback.calledWith(new Error()).should.equal false + + describe "when the container tries to be created, but already has been (race condition)", -> + + describe "waitForContainer", -> + beforeEach -> + @containerId = "container-id" + @timeout = 5000 + @container.wait = sinon.stub().yields(null, StatusCode: @statusCode = 42) + @container.kill = sinon.stub().yields() + + describe "when the container returns in time", -> + beforeEach -> + @DockerRunner.waitForContainer @containerId, @timeout, @callback + + it "should wait for the container", -> + @getContainer + .calledWith(@containerId) + .should.equal true + @container.wait + .called + .should.equal true + + it "should call the callback with the exit", -> + @callback + .calledWith(null, @statusCode) + .should.equal true + + describe "when the container does not return before the timeout", -> + beforeEach (done) -> + @container.wait = (callback = (error, exitCode) ->) -> + setTimeout () -> + callback(null, StatusCode: 42) + , 100 + @timeout = 5 + @DockerRunner.waitForContainer @containerId, @timeout, (args...) => + @callback(args...) + done() + + it "should call kill on the container", -> + @getContainer + .calledWith(@containerId) + .should.equal true + @container.kill + .called + .should.equal true + + it "should call the callback with an error", -> + error = new Error("container timed out") + error.timedout = true + @callback + .calledWith(error) + .should.equal true + + describe "destroyOldContainers", -> + beforeEach (done) -> + oneHourInSeconds = 60 * 60 + oneHourInMilliseconds = oneHourInSeconds * 1000 + nowInSeconds = Date.now()/1000 + @containers = [{ + Name: "/project-old-container-name" + Id: "old-container-id" + Created: nowInSeconds - oneHourInSeconds - 100 + }, { + Name: "/project-new-container-name" + Id: "new-container-id" + Created: nowInSeconds - oneHourInSeconds + 100 + }, { + Name: "/totally-not-a-project-container" + Id: "some-random-id" + Created: nowInSeconds - (2 * oneHourInSeconds ) + }] + @DockerRunner.MAX_CONTAINER_AGE = oneHourInMilliseconds + @listContainers.callsArgWith(1, null, @containers) + @DockerRunner.destroyContainer = sinon.stub().callsArg(3) + @DockerRunner.destroyOldContainers (error) => + @callback(error) + done() + + it "should list all containers", -> + @listContainers + .calledWith(all: true) + .should.equal true + + it "should destroy old containers", -> + @DockerRunner.destroyContainer + .callCount + .should.equal 1 + @DockerRunner.destroyContainer + .calledWith("/project-old-container-name", "old-container-id") + .should.equal true + + it "should not destroy new containers", -> + @DockerRunner.destroyContainer + .calledWith("/project-new-container-name", "new-container-id") + .should.equal false + + it "should not destroy non-project containers", -> + @DockerRunner.destroyContainer + .calledWith("/totally-not-a-project-container", "some-random-id") + .should.equal false + + it "should callback the callback", -> + @callback.called.should.equal true + + + describe '_destroyContainer', -> + beforeEach -> + @containerId = 'some_id' + @fakeContainer = + remove: sinon.stub().callsArgWith(1, null) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should get the container', (done) -> + @DockerRunner._destroyContainer @containerId, false, (err) => + @Docker::getContainer.callCount.should.equal 1 + @Docker::getContainer.calledWith(@containerId).should.equal true + done() + + it 'should try to force-destroy the container when shouldForce=true', (done) -> + @DockerRunner._destroyContainer @containerId, true, (err) => + @fakeContainer.remove.callCount.should.equal 1 + @fakeContainer.remove.calledWith({force: true}).should.equal true + done() + + it 'should not try to force-destroy the container when shouldForce=false', (done) -> + @DockerRunner._destroyContainer @containerId, false, (err) => + @fakeContainer.remove.callCount.should.equal 1 + @fakeContainer.remove.calledWith({force: false}).should.equal true + done() + + it 'should not produce an error', (done) -> + @DockerRunner._destroyContainer @containerId, false, (err) => + expect(err).to.equal null + done() + + describe 'when the container is already gone', -> + beforeEach -> + @fakeError = new Error('woops') + @fakeError.statusCode = 404 + @fakeContainer = + remove: sinon.stub().callsArgWith(1, @fakeError) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should not produce an error', (done) -> + @DockerRunner._destroyContainer @containerId, false, (err) => + expect(err).to.equal null + done() + + describe 'when container.destroy produces an error', (done) -> + beforeEach -> + @fakeError = new Error('woops') + @fakeError.statusCode = 500 + @fakeContainer = + remove: sinon.stub().callsArgWith(1, @fakeError) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should produce an error', (done) -> + @DockerRunner._destroyContainer @containerId, false, (err) => + expect(err).to.not.equal null + expect(err).to.equal @fakeError + done() + + + describe 'kill', -> + beforeEach -> + @containerId = 'some_id' + @fakeContainer = + kill: sinon.stub().callsArgWith(0, null) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should get the container', (done) -> + @DockerRunner.kill @containerId, (err) => + @Docker::getContainer.callCount.should.equal 1 + @Docker::getContainer.calledWith(@containerId).should.equal true + done() + + it 'should try to force-destroy the container', (done) -> + @DockerRunner.kill @containerId, (err) => + @fakeContainer.kill.callCount.should.equal 1 + done() + + it 'should not produce an error', (done) -> + @DockerRunner.kill @containerId, (err) => + expect(err).to.equal undefined + done() + + describe 'when the container is not actually running', -> + beforeEach -> + @fakeError = new Error('woops') + @fakeError.statusCode = 500 + @fakeError.message = 'Cannot kill container is not running' + @fakeContainer = + kill: sinon.stub().callsArgWith(0, @fakeError) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should not produce an error', (done) -> + @DockerRunner.kill @containerId, (err) => + expect(err).to.equal undefined + done() + + describe 'when container.kill produces a legitimate error', (done) -> + beforeEach -> + @fakeError = new Error('woops') + @fakeError.statusCode = 500 + @fakeError.message = 'Totally legitimate reason to throw an error' + @fakeContainer = + kill: sinon.stub().callsArgWith(0, @fakeError) + @Docker::getContainer = sinon.stub().returns(@fakeContainer) + + it 'should produce an error', (done) -> + @DockerRunner.kill @containerId, (err) => + expect(err).to.not.equal undefined + expect(err).to.equal @fakeError + done() diff --git a/test/unit/coffee/LockManager.coffee b/test/unit/coffee/LockManagerTests.coffee similarity index 98% rename from test/unit/coffee/LockManager.coffee rename to test/unit/coffee/LockManagerTests.coffee index c1071a5..2d0c95a 100644 --- a/test/unit/coffee/LockManager.coffee +++ b/test/unit/coffee/LockManagerTests.coffee @@ -5,7 +5,7 @@ modulePath = require('path').join __dirname, '../../../app/js/LockManager' Path = require "path" Errors = require "../../../app/js/Errors" -describe "LockManager", -> +describe "DockerLockManager", -> beforeEach -> @LockManager = SandboxedModule.require modulePath, requires: "settings-sharelatex": {}