diff --git a/app.coffee b/app.coffee index 44d3157..9ac1d1d 100644 --- a/app.coffee +++ b/app.coffee @@ -12,6 +12,7 @@ Metrics.initialize("clsi") Metrics.open_sockets.monitor(logger) ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager" +OutputCacheManager = require "./app/js/OutputCacheManager" require("./app/js/db").sync() @@ -36,7 +37,12 @@ app.delete "/project/:project_id", CompileController.clearCache app.get "/project/:project_id/sync/code", CompileController.syncFromCode app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf -staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, stat) -> +ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks" + +# create a static server which does not allow access to any symlinks +# avoids possible mismatch of root directory between middleware check +# and serving the files +staticServer = ForbidSymlinks express.static, Settings.path.compilesDir, setHeaders: (res, path, stat) -> if Path.basename(path) == "output.pdf" res.set("Content-Type", "application/pdf") # Calculate an etag in the same way as nginx @@ -50,8 +56,12 @@ staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, # that could be used in same-origin/XSS attacks. res.set("Content-Type", "text/plain") -app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) -> - req.url = "/#{req.params.project_id}/#{req.params[0]}" +app.get "/project/:project_id/output/*", (req, res, next) -> + if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX) + # for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId) + req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build, "/#{req.params[0]}") + else + req.url = "/#{req.params.project_id}/#{req.params[0]}" staticServer(req, res, next) app.get "/status", (req, res, next) -> diff --git a/app/coffee/CompileController.coffee b/app/coffee/CompileController.coffee index c738202..29373c3 100644 --- a/app/coffee/CompileController.coffee +++ b/app/coffee/CompileController.coffee @@ -35,6 +35,7 @@ module.exports = CompileController = outputFiles: outputFiles.map (file) -> url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}" type: file.type + build: file.build } clearCache: (req, res, next = (error) ->) -> diff --git a/app/coffee/CompileManager.coffee b/app/coffee/CompileManager.coffee index 46e2c0f..ac0c241 100644 --- a/app/coffee/CompileManager.coffee +++ b/app/coffee/CompileManager.coffee @@ -1,6 +1,7 @@ ResourceWriter = require "./ResourceWriter" LatexRunner = require "./LatexRunner" OutputFileFinder = require "./OutputFileFinder" +OutputCacheManager = require "./OutputCacheManager" Settings = require("settings-sharelatex") Path = require "path" logger = require "logger-sharelatex" @@ -32,7 +33,8 @@ module.exports = CompileManager = OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) -> return callback(error) if error? - callback null, outputFiles + OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) -> + callback null, newOutputFiles clearProject: (project_id, _callback = (error) ->) -> callback = (error) -> diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee new file mode 100644 index 0000000..66abc4e --- /dev/null +++ b/app/coffee/OutputCacheManager.coffee @@ -0,0 +1,113 @@ +async = require "async" +fs = require "fs" +fse = require "fs-extra" +Path = require "path" +logger = require "logger-sharelatex" +_ = require "underscore" + +OutputFileOptimiser = require "./OutputFileOptimiser" + +module.exports = OutputCacheManager = + CACHE_SUBDIR: '.cache/clsi' + BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex + CACHE_LIMIT: 2 # maximum number of cache directories + CACHE_AGE: 60*60*1000 # up to one hour old + + path: (buildId, file) -> + # used by static server, given build id return '.cache/clsi/buildId' + if buildId.match OutputCacheManager.BUILD_REGEX + return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file) + else + # for invalid build id, return top level + return file + + saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) -> + # make a compileDir/CACHE_SUBDIR/build_id directory and + # copy all the output files into it + cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR) + # Put the files into a new cache subdirectory + buildId = Date.now().toString(16) + cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId) + # let file expiry run in the background + OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId} + + checkFile = (src, callback) -> + # check if we have a valid file to copy into the cache + fs.stat src, (err, stats) -> + if err? + # some problem reading the file + logger.error err: err, file: src, "stat error for file in cache" + callback(err) + else if not stats.isFile() + # other filetype - reject it + logger.error err: err, src: src, dst: dst, stat: stats, "nonfile output - refusing to copy to cache" + callback(new Error("output file is not a file"), file) + else + # it's a plain file, ok to copy + callback(null) + + copyFile = (src, dst, callback) -> + # copy output file into the cache + fse.copy src, dst, (err) -> + if err? + logger.error err: err, src: src, dst: dst, "copy error for file in cache" + callback(err) + else + # call the optimiser for the file too + OutputFileOptimiser.optimiseFile src, dst, callback + + # make the new cache directory + fse.ensureDir cacheDir, (err) -> + if err? + logger.error err: err, directory: cacheDir, "error creating cache directory" + callback(err, outputFiles) + else + # copy all the output files into the new cache directory + async.mapSeries outputFiles, (file, cb) -> + newFile = _.clone(file) + [src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)] + checkFile src, (err) -> + copyFile src, dst, (err) -> + if not err? + newFile.build = buildId # attach a build id if we cached the file + cb(err, newFile) + , (err, results) -> + if err? + # pass back the original files if we encountered *any* error + callback(err, outputFiles) + else + # pass back the list of new files in the cache + callback(err, results) + + expireOutputFiles: (cacheRoot, options, callback = (error) ->) -> + # look in compileDir for build dirs and delete if > N or age of mod time > T + fs.readdir cacheRoot, (err, results) -> + if err? + return callback(null) if err.code == 'ENOENT' # cache directory is empty + logger.error err: err, project_id: cacheRoot, "error clearing cache" + return callback(err) + + dirs = results.sort().reverse() + currentTime = Date.now() + + isExpired = (dir, index) -> + return false if options?.keep == dir + # remove any directories over the hard limit + return true if index > OutputCacheManager.CACHE_LIMIT + # we can get the build time from the directory name + dirTime = parseInt(dir, 16) + age = currentTime - dirTime + return age > OutputCacheManager.CACHE_AGE + + toRemove = _.filter(dirs, isExpired) + + removeDir = (dir, cb) -> + fse.remove Path.join(cacheRoot, dir), (err, result) -> + logger.log cache: cacheRoot, dir: dir, "removed expired cache dir" + if err? + logger.error err: err, dir: dir, "cache remove error" + cb(err, result) + + async.eachSeries toRemove, (dir, cb) -> + removeDir dir, cb + , callback diff --git a/app/coffee/OutputFileFinder.coffee b/app/coffee/OutputFileFinder.coffee index fbfd38e..26f07cb 100644 --- a/app/coffee/OutputFileFinder.coffee +++ b/app/coffee/OutputFileFinder.coffee @@ -37,7 +37,7 @@ module.exports = OutputFileFinder = _callback(error, fileList) _callback = () -> - args = [directory, "-type", "f"] + args = [directory, "-name", ".cache", "-prune", "-o", "-type", "f", "-print"] logger.log args: args, "running find command" proc = spawn("find", args) diff --git a/app/coffee/OutputFileOptimiser.coffee b/app/coffee/OutputFileOptimiser.coffee new file mode 100644 index 0000000..d0c091d --- /dev/null +++ b/app/coffee/OutputFileOptimiser.coffee @@ -0,0 +1,37 @@ +fs = require "fs" +Path = require "path" +spawn = require("child_process").spawn +logger = require "logger-sharelatex" +_ = require "underscore" + +module.exports = OutputFileOptimiser = + + optimiseFile: (src, dst, callback = (error) ->) -> + # check output file (src) and see if we can optimise it, storing + # the result in the build directory (dst) + if src.match(/\.pdf$/) + OutputFileOptimiser.optimisePDF src, dst, callback + else + callback (null) + + optimisePDF: (src, dst, callback = (error) ->) -> + tmpOutput = dst + '.opt' + args = ["--linearize", src, tmpOutput] + logger.log args: args, "running qpdf command" + + proc = spawn("qpdf", args) + stdout = "" + proc.stdout.on "data", (chunk) -> + stdout += chunk.toString() + callback = _.once(callback) # avoid double call back for error and close event + proc.on "error", (err) -> + logger.warn {err, args}, "qpdf failed" + callback(null) # ignore the error + proc.on "close", (code) -> + if code != 0 + logger.warn {code, args}, "qpdf returned error" + return callback(null) # ignore the error + fs.rename tmpOutput, dst, (err) -> + if err? + logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command" + callback(null) # ignore the error diff --git a/app/coffee/StaticServerForbidSymlinks.coffee b/app/coffee/StaticServerForbidSymlinks.coffee new file mode 100644 index 0000000..2f48190 --- /dev/null +++ b/app/coffee/StaticServerForbidSymlinks.coffee @@ -0,0 +1,24 @@ +Path = require("path") +fs = require("fs") +Settings = require("settings-sharelatex") +logger = require("logger-sharelatex") +url = require "url" + +module.exports = ForbidSymlinks = (staticFn, root, options) -> + expressStatic = staticFn root, options + basePath = Path.resolve(root) + return (req, res, next) -> + path = url.parse(req.url)?.pathname + requestedFsPath = Path.normalize("#{basePath}/#{path}") + fs.realpath requestedFsPath, (err, realFsPath)-> + if err? + logger.warn err:err, requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "error checking file access" + if err.code == 'ENOENT' + return res.sendStatus(404) + else + return res.sendStatus(500) + else if requestedFsPath != realFsPath + logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" + return res.sendStatus(404) + else + expressStatic(req, res, next) diff --git a/app/coffee/SymlinkCheckerMiddlewear.coffee b/app/coffee/SymlinkCheckerMiddlewear.coffee deleted file mode 100644 index b7baf1f..0000000 --- a/app/coffee/SymlinkCheckerMiddlewear.coffee +++ /dev/null @@ -1,17 +0,0 @@ -Path = require("path") -fs = require("fs") -Settings = require("settings-sharelatex") -logger = require("logger-sharelatex") - - -module.exports = (req, res, next)-> - basePath = Path.resolve("#{Settings.path.compilesDir}/#{req.params.project_id}") - requestedFsPath = Path.normalize("#{basePath}/#{req.params[0]}") - fs.realpath requestedFsPath, (err, realFsPath)-> - if err? - return res.send(500) - else if requestedFsPath != realFsPath - logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" - return res.send(404) - else - return next() diff --git a/package.json b/package.json index 70283c6..678d971 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,9 @@ "smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0", "sqlite3": "~2.2.0", "express": "^4.2.0", - "body-parser": "^1.2.0" + "body-parser": "^1.2.0", + "fs-extra": "^0.16.3", + "underscore": "^1.8.2" }, "devDependencies": { "mocha": "1.10.0", diff --git a/test/unit/coffee/CompileControllerTests.coffee b/test/unit/coffee/CompileControllerTests.coffee index d2babf7..bc2d988 100644 --- a/test/unit/coffee/CompileControllerTests.coffee +++ b/test/unit/coffee/CompileControllerTests.coffee @@ -36,9 +36,11 @@ describe "CompileController", -> @output_files = [{ path: "output.pdf" type: "pdf" + build: 1234 }, { path: "output.log" type: "log" + build: 1234 }] @RequestParser.parse = sinon.stub().callsArgWith(1, null, @request) @ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1) @@ -73,6 +75,7 @@ describe "CompileController", -> outputFiles: @output_files.map (file) => url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}" type: file.type + build: file.build ) .should.equal true diff --git a/test/unit/coffee/CompileManagerTests.coffee b/test/unit/coffee/CompileManagerTests.coffee index 5d110eb..8923dea 100644 --- a/test/unit/coffee/CompileManagerTests.coffee +++ b/test/unit/coffee/CompileManagerTests.coffee @@ -12,6 +12,7 @@ describe "CompileManager", -> "./LatexRunner": @LatexRunner = {} "./ResourceWriter": @ResourceWriter = {} "./OutputFileFinder": @OutputFileFinder = {} + "./OutputCacheManager": @OutputCacheManager = {} "settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" } "logger-sharelatex": @logger = { log: sinon.stub() } "child_process": @child_process = {} @@ -26,6 +27,15 @@ describe "CompileManager", -> path: "output.pdf" type: "pdf" }] + @build_files = [{ + path: "output.log" + type: "log" + build: 1234 + }, { + path: "output.pdf" + type: "pdf" + build: 1234 + }] @request = resources: @resources = "mock-resources" rootResourcePath: @rootResourcePath = "main.tex" @@ -37,6 +47,7 @@ describe "CompileManager", -> @ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3) @LatexRunner.runLatex = sinon.stub().callsArg(2) @OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files) + @OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files) @CompileManager.doCompile @request, @callback it "should write the resources to disk", -> @@ -60,7 +71,8 @@ describe "CompileManager", -> .should.equal true it "should return the output files", -> - @callback.calledWith(null, @output_files).should.equal true + console.log 'output_files', @build_files + @callback.calledWith(null, @build_files).should.equal true describe "clearProject", -> describe "succesfully", -> diff --git a/test/unit/coffee/StaticServerForbidSymlinksTests.coffee b/test/unit/coffee/StaticServerForbidSymlinksTests.coffee new file mode 100644 index 0000000..e17bace --- /dev/null +++ b/test/unit/coffee/StaticServerForbidSymlinksTests.coffee @@ -0,0 +1,82 @@ +should = require('chai').should() +SandboxedModule = require('sandboxed-module') +assert = require('assert') +path = require('path') +sinon = require('sinon') +modulePath = path.join __dirname, "../../../app/js/StaticServerForbidSymlinks" +expect = require("chai").expect + +describe "StaticServerForbidSymlinks", -> + + beforeEach -> + + @settings = + path: + compilesDir: "/compiles/here" + + @fs = {} + @ForbidSymlinks = SandboxedModule.require modulePath, requires: + "settings-sharelatex":@settings + "logger-sharelatex": + log:-> + warn:-> + "fs":@fs + + @dummyStatic = (rootDir, options) -> + return (req, res, next) -> + # console.log "dummyStatic serving file", rootDir, "called with", req.url + # serve it + next() + + @StaticServerForbidSymlinks = @ForbidSymlinks @dummyStatic, @settings.path.compilesDir + @req = + params: + project_id:"12345" + + @res = {} + @req.url = "/12345/output.pdf" + + + describe "sending a normal file through", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf") + + it "should call next", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 200 + done() + @StaticServerForbidSymlinks @req, @res, done + + + describe "with a missing file", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, {code: 'ENOENT'}, "#{@settings.path.compilesDir}/#{@req.params.project_id}/unknown.pdf") + + it "should send a 404", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 404 + done() + @StaticServerForbidSymlinks @req, @res + + + describe "with a symlink file", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf") + + it "should send a 404", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 404 + done() + @StaticServerForbidSymlinks @req, @res + + describe "with an error from fs.realpath", -> + + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, "error") + + it "should send a 500", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 500 + done() + @StaticServerForbidSymlinks @req, @res + diff --git a/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee b/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee deleted file mode 100644 index 50bd427..0000000 --- a/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee +++ /dev/null @@ -1,60 +0,0 @@ -should = require('chai').should() -SandboxedModule = require('sandboxed-module') -assert = require('assert') -path = require('path') -sinon = require('sinon') -modulePath = path.join __dirname, "../../../app/js/SymlinkCheckerMiddlewear" -expect = require("chai").expect - -describe "SymlinkCheckerMiddlewear", -> - - beforeEach -> - - @settings = - path: - compilesDir: "/compiles/here" - - @fs = {} - @SymlinkCheckerMiddlewear = SandboxedModule.require modulePath, requires: - "settings-sharelatex":@settings - "logger-sharelatex": - log:-> - warn:-> - "fs":@fs - @req = - params: - project_id:"12345" - - @res = {} - @req.params[0]= "output.pdf" - - - describe "sending a normal file through", -> - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf") - - it "should call next", (done)-> - @SymlinkCheckerMiddlewear @req, @res, done - - - describe "with a symlink file", -> - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf") - - it "should send a 404", (done)-> - @res.send = (resCode)-> - resCode.should.equal 404 - done() - @SymlinkCheckerMiddlewear @req, @res - - describe "with an error from fs.realpath", -> - - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, "error") - - it "should send a 500", (done)-> - @res.send = (resCode)-> - resCode.should.equal 500 - done() - @SymlinkCheckerMiddlewear @req, @res -