From 1923352e66f103bd9f9304e8fde921fff9afd958 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 24 Feb 2015 14:40:05 +0000 Subject: [PATCH 01/14] save output files in a .cache directory --- app/coffee/CompileManager.coffee | 4 ++- app/coffee/OutputCacheManager.coffee | 49 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 app/coffee/OutputCacheManager.coffee diff --git a/app/coffee/CompileManager.coffee b/app/coffee/CompileManager.coffee index 46e2c0f..dbdb39f 100644 --- a/app/coffee/CompileManager.coffee +++ b/app/coffee/CompileManager.coffee @@ -1,6 +1,7 @@ ResourceWriter = require "./ResourceWriter" LatexRunner = require "./LatexRunner" OutputFileFinder = require "./OutputFileFinder" +OutputCacheManager = require "./OutputCacheManager" Settings = require("settings-sharelatex") Path = require "path" logger = require "logger-sharelatex" @@ -32,7 +33,8 @@ module.exports = CompileManager = OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) -> return callback(error) if error? - callback null, outputFiles + OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) -> + callback null, newOutputFiles clearProject: (project_id, _callback = (error) ->) -> callback = (error) -> diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee new file mode 100644 index 0000000..0f96b11 --- /dev/null +++ b/app/coffee/OutputCacheManager.coffee @@ -0,0 +1,49 @@ +async = require "async" +fs = require "fs" +fse = require "fs-extra" +Path = require "path" +logger = require "logger-sharelatex" +_ = require "underscore" + +module.exports = OutputCacheManager = + CACHE_DIR: '.cache/clsi' + + saveOutputFiles: (outputFiles, target, callback) -> + # make a target/build_id directory and + # copy all the output files into it + buildId = 'build-' + Date.now() + relDir = OutputCacheManager.CACHE_DIR + '/' + buildId + newDir = target + '/' + relDir + OutputCacheManager.expireOutputFiles target + fse.ensureDir newDir, (err) -> + if err? + callback(err, outputFiles) + else + async.mapSeries outputFiles, (file, cb) -> + newFile = _.clone(file) + newFile.path = relDir + '/' + file.path + src = target + '/' + file.path + dst = target + '/' + newFile.path + #console.log 'src', src, 'dst', dst + fs.stat src, (err, stats) -> + if err? + cb(err) + else if stats.isFile() + #console.log 'isFile: copying' + fse.copy src, dst, (err) -> + cb(err, newFile) + else + # other filetype - shouldn't happen + cb(new Error("output file is not a file"), file) + , (err, results) -> + if err? + callback err, outputFiles + else + callback(err, results) + + expireOutputFiles: (target, callback) -> + # look in target for build dirs and delete if > N or age of mod time > T + cacheDir = target + '/' + OutputCacheManager.CACHE_DIR + fs.readdir cacheDir, (err, results) -> + console.log 'CACHEDIR', results + callback(err) if callback? From 67bfeacab81af032791253996fd30385aa8c41bf Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 24 Feb 2015 14:40:22 +0000 Subject: [PATCH 02/14] skip the cache directory when finding output files --- app/coffee/OutputFileFinder.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/coffee/OutputFileFinder.coffee b/app/coffee/OutputFileFinder.coffee index fbfd38e..26f07cb 100644 --- a/app/coffee/OutputFileFinder.coffee +++ b/app/coffee/OutputFileFinder.coffee @@ -37,7 +37,7 @@ module.exports = OutputFileFinder = _callback(error, fileList) _callback = () -> - args = [directory, "-type", "f"] + args = [directory, "-name", ".cache", "-prune", "-o", "-type", "f", "-print"] logger.log args: args, "running find command" proc = spawn("find", args) From 163a33674b1472c887cf9f06de05a44211c30b78 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 24 Feb 2015 15:48:34 +0000 Subject: [PATCH 03/14] add an optimisation pass for the cached output files --- app/coffee/OutputCacheManager.coffee | 7 ++++++- app/coffee/OutputFileOptimiser.coffee | 30 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 app/coffee/OutputFileOptimiser.coffee diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index 0f96b11..479637d 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -5,12 +5,16 @@ Path = require "path" logger = require "logger-sharelatex" _ = require "underscore" +OutputFileOptimiser = require "./OutputFileOptimiser" + module.exports = OutputCacheManager = CACHE_DIR: '.cache/clsi' saveOutputFiles: (outputFiles, target, callback) -> # make a target/build_id directory and # copy all the output files into it + # + # TODO: use Path module buildId = 'build-' + Date.now() relDir = OutputCacheManager.CACHE_DIR + '/' + buildId newDir = target + '/' + relDir @@ -31,7 +35,8 @@ module.exports = OutputCacheManager = else if stats.isFile() #console.log 'isFile: copying' fse.copy src, dst, (err) -> - cb(err, newFile) + OutputFileOptimiser.optimiseFile src, dst, (err, result) -> + cb(err, newFile) else # other filetype - shouldn't happen cb(new Error("output file is not a file"), file) diff --git a/app/coffee/OutputFileOptimiser.coffee b/app/coffee/OutputFileOptimiser.coffee new file mode 100644 index 0000000..a00dc20 --- /dev/null +++ b/app/coffee/OutputFileOptimiser.coffee @@ -0,0 +1,30 @@ +fs = require "fs" +Path = require "path" +spawn = require("child_process").spawn +logger = require "logger-sharelatex" + +module.exports = OutputFileOptimiser = + + optimiseFile: (src, dst, callback = (error) ->) -> + if src.match(/\.pdf$/) + OutputFileOptimiser.optimisePDF src, dst, callback + else + callback (null) + + optimisePDF: (src, dst, callback = (error) ->) -> + tmpOutput = dst + '.opt' + args = ["--linearize", src, tmpOutput] + logger.log args: args, "running qpdf command" + + proc = spawn("qpdf", args) + stdout = "" + proc.stdout.on "data", (chunk) -> + stdout += chunk.toString() + proc.on "error", callback + proc.on "close", (code) -> + if code != 0 + logger.warn {directory, code}, "qpdf returned error" + return callback null + fs.rename tmpOutput, dst, (err) -> + # could log an error here + callback null From b8cdd4fa85efec7e9eacded5b376d897417b9a26 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 24 Feb 2015 16:09:55 +0000 Subject: [PATCH 04/14] added package dependencies for caching --- package.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 1afcfea..58e6745 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,9 @@ "smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0", "sqlite3": "~2.2.0", "express": "^4.2.0", - "body-parser": "^1.2.0" + "body-parser": "^1.2.0", + "fs-extra": "^0.16.3", + "underscore": "^1.8.2" }, "devDependencies": { "mocha": "1.10.0", From 151ea99639f1cd63246c4b637fdd5c10d1f33362 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 25 Feb 2015 17:05:19 +0000 Subject: [PATCH 05/14] accept build id parameter when serving static files --- app.coffee | 24 ++++++++++++++++++++++-- app/coffee/CompileController.coffee | 1 + app/coffee/CompileManager.coffee | 2 +- app/coffee/OutputCacheManager.coffee | 7 ++++--- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/app.coffee b/app.coffee index 44d3157..dd2bae5 100644 --- a/app.coffee +++ b/app.coffee @@ -36,7 +36,24 @@ app.delete "/project/:project_id", CompileController.clearCache app.get "/project/:project_id/sync/code", CompileController.syncFromCode app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf -staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, stat) -> +url = require "url" + +staticForbidSymLinks = (root, options) -> + expressStatic = express.static root, options + basePath = Path.resolve(root) + return (req, res, next) -> + path = url.parse(req.url).pathname + requestedFsPath = Path.normalize("#{basePath}/#{path}") + fs.realpath requestedFsPath, (err, realFsPath)-> + if err? + return res.send(500) + else if requestedFsPath != realFsPath + logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" + return res.send(404) + else + expressStatic(req, res, next) + +staticServer = staticForbidSymLinks Settings.path.compilesDir, setHeaders: (res, path, stat) -> if Path.basename(path) == "output.pdf" res.set("Content-Type", "application/pdf") # Calculate an etag in the same way as nginx @@ -51,7 +68,10 @@ staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, res.set("Content-Type", "text/plain") app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) -> - req.url = "/#{req.params.project_id}/#{req.params[0]}" + if req.query?.build? && req.query.build.match(/^[0-9]+$/) + req.url = "/#{req.params.project_id}/.cache/clsi/#{req.query.build}/#{req.params[0]}" + else + req.url = "/#{req.params.project_id}/#{req.params[0]}" staticServer(req, res, next) app.get "/status", (req, res, next) -> diff --git a/app/coffee/CompileController.coffee b/app/coffee/CompileController.coffee index c738202..29373c3 100644 --- a/app/coffee/CompileController.coffee +++ b/app/coffee/CompileController.coffee @@ -35,6 +35,7 @@ module.exports = CompileController = outputFiles: outputFiles.map (file) -> url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}" type: file.type + build: file.build } clearCache: (req, res, next = (error) ->) -> diff --git a/app/coffee/CompileManager.coffee b/app/coffee/CompileManager.coffee index dbdb39f..ac0c241 100644 --- a/app/coffee/CompileManager.coffee +++ b/app/coffee/CompileManager.coffee @@ -33,7 +33,7 @@ module.exports = CompileManager = OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) -> return callback(error) if error? - OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) -> + OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) -> callback null, newOutputFiles clearProject: (project_id, _callback = (error) ->) -> diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index 479637d..c34a662 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -15,7 +15,7 @@ module.exports = OutputCacheManager = # copy all the output files into it # # TODO: use Path module - buildId = 'build-' + Date.now() + buildId = Date.now() relDir = OutputCacheManager.CACHE_DIR + '/' + buildId newDir = target + '/' + relDir OutputCacheManager.expireOutputFiles target @@ -25,9 +25,8 @@ module.exports = OutputCacheManager = else async.mapSeries outputFiles, (file, cb) -> newFile = _.clone(file) - newFile.path = relDir + '/' + file.path src = target + '/' + file.path - dst = target + '/' + newFile.path + dst = target + '/' + relDir + '/' + file.path #console.log 'src', src, 'dst', dst fs.stat src, (err, stats) -> if err? @@ -36,6 +35,8 @@ module.exports = OutputCacheManager = #console.log 'isFile: copying' fse.copy src, dst, (err) -> OutputFileOptimiser.optimiseFile src, dst, (err, result) -> + console.log 'setting buildId on', newFile, 'to', buildId + newFile.build = buildId cb(err, newFile) else # other filetype - shouldn't happen From e7ed8d786a2675dd35beae1696d48bf7a5f61356 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 26 Feb 2015 15:30:57 +0000 Subject: [PATCH 06/14] fix tests to allow for build parameter --- test/unit/coffee/CompileControllerTests.coffee | 3 +++ test/unit/coffee/CompileManagerTests.coffee | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/test/unit/coffee/CompileControllerTests.coffee b/test/unit/coffee/CompileControllerTests.coffee index d2babf7..bc2d988 100644 --- a/test/unit/coffee/CompileControllerTests.coffee +++ b/test/unit/coffee/CompileControllerTests.coffee @@ -36,9 +36,11 @@ describe "CompileController", -> @output_files = [{ path: "output.pdf" type: "pdf" + build: 1234 }, { path: "output.log" type: "log" + build: 1234 }] @RequestParser.parse = sinon.stub().callsArgWith(1, null, @request) @ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1) @@ -73,6 +75,7 @@ describe "CompileController", -> outputFiles: @output_files.map (file) => url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}" type: file.type + build: file.build ) .should.equal true diff --git a/test/unit/coffee/CompileManagerTests.coffee b/test/unit/coffee/CompileManagerTests.coffee index 5d110eb..8923dea 100644 --- a/test/unit/coffee/CompileManagerTests.coffee +++ b/test/unit/coffee/CompileManagerTests.coffee @@ -12,6 +12,7 @@ describe "CompileManager", -> "./LatexRunner": @LatexRunner = {} "./ResourceWriter": @ResourceWriter = {} "./OutputFileFinder": @OutputFileFinder = {} + "./OutputCacheManager": @OutputCacheManager = {} "settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" } "logger-sharelatex": @logger = { log: sinon.stub() } "child_process": @child_process = {} @@ -26,6 +27,15 @@ describe "CompileManager", -> path: "output.pdf" type: "pdf" }] + @build_files = [{ + path: "output.log" + type: "log" + build: 1234 + }, { + path: "output.pdf" + type: "pdf" + build: 1234 + }] @request = resources: @resources = "mock-resources" rootResourcePath: @rootResourcePath = "main.tex" @@ -37,6 +47,7 @@ describe "CompileManager", -> @ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3) @LatexRunner.runLatex = sinon.stub().callsArg(2) @OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files) + @OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files) @CompileManager.doCompile @request, @callback it "should write the resources to disk", -> @@ -60,7 +71,8 @@ describe "CompileManager", -> .should.equal true it "should return the output files", -> - @callback.calledWith(null, @output_files).should.equal true + console.log 'output_files', @build_files + @callback.calledWith(null, @build_files).should.equal true describe "clearProject", -> describe "succesfully", -> From 280d64cf6029fd8349c710bc5a4d9d88a1bdc2ed Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 26 Feb 2015 15:32:01 +0000 Subject: [PATCH 07/14] remove debugging code --- app/coffee/OutputCacheManager.coffee | 4 ---- 1 file changed, 4 deletions(-) diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index c34a662..c869bfd 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -27,15 +27,12 @@ module.exports = OutputCacheManager = newFile = _.clone(file) src = target + '/' + file.path dst = target + '/' + relDir + '/' + file.path - #console.log 'src', src, 'dst', dst fs.stat src, (err, stats) -> if err? cb(err) else if stats.isFile() - #console.log 'isFile: copying' fse.copy src, dst, (err) -> OutputFileOptimiser.optimiseFile src, dst, (err, result) -> - console.log 'setting buildId on', newFile, 'to', buildId newFile.build = buildId cb(err, newFile) else @@ -51,5 +48,4 @@ module.exports = OutputCacheManager = # look in target for build dirs and delete if > N or age of mod time > T cacheDir = target + '/' + OutputCacheManager.CACHE_DIR fs.readdir cacheDir, (err, results) -> - console.log 'CACHEDIR', results callback(err) if callback? From 198e1ef4928fbcf162795189918e6ed35227dd9b Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Feb 2015 13:15:28 +0000 Subject: [PATCH 08/14] cleanup and logging --- app/coffee/OutputCacheManager.coffee | 115 +++++++++++++++++++------- app/coffee/OutputFileOptimiser.coffee | 11 ++- 2 files changed, 93 insertions(+), 33 deletions(-) diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index c869bfd..f77227a 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -8,44 +8,101 @@ _ = require "underscore" OutputFileOptimiser = require "./OutputFileOptimiser" module.exports = OutputCacheManager = - CACHE_DIR: '.cache/clsi' + CACHE_SUBDIR: '.cache/clsi' + BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex + CACHE_LIMIT: 32 # maximum of 32 cache directories + CACHE_AGE: 60*60*1000 # up to one hour old - saveOutputFiles: (outputFiles, target, callback) -> - # make a target/build_id directory and + path: (buildId) -> + # used by static server, given build id return '.cache/clsi/buildId' + return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId) + + saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) -> + # make a compileDir/CACHE_SUBDIR/build_id directory and # copy all the output files into it - # - # TODO: use Path module - buildId = Date.now() - relDir = OutputCacheManager.CACHE_DIR + '/' + buildId - newDir = target + '/' + relDir - OutputCacheManager.expireOutputFiles target - fse.ensureDir newDir, (err) -> + cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR) + # Put the files into a new cache subdirectory + buildId = Date.now().toString(16) + cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId) + # let file expiry run in the background + OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId} + + checkFile = (src, callback) -> + # check if we have a valid file to copy into the cache + fs.stat src, (err, stats) -> + if err? + # some problem reading the file + logger.error err: err, file: src, "stat error for file in cache" + callback(err) + else if not stats.isFile() + # other filetype - reject it + logger.error err: err, src: src, dst: dst, stat: stats, "nonfile output - refusing to copy to cache" + callback(new Error("output file is not a file"), file) + else + # it's a plain file, ok to copy + callback(null) + + copyFile = (src, dst, callback) -> + # copy output file into the cache + fse.copy src, dst, (err) -> + if err? + logger.error err: err, src: src, dst: dst, "copy error for file in cache" + callback(err) + else + # call the optimiser for the file too + OutputFileOptimiser.optimiseFile src, dst, callback + + # make the new cache directory + fse.ensureDir cacheDir, (err) -> if err? + logger.error err: err, directory: cacheDir, "error creating cache directory" callback(err, outputFiles) else + # copy all the output files into the new cache directory async.mapSeries outputFiles, (file, cb) -> newFile = _.clone(file) - src = target + '/' + file.path - dst = target + '/' + relDir + '/' + file.path - fs.stat src, (err, stats) -> - if err? - cb(err) - else if stats.isFile() - fse.copy src, dst, (err) -> - OutputFileOptimiser.optimiseFile src, dst, (err, result) -> - newFile.build = buildId - cb(err, newFile) - else - # other filetype - shouldn't happen - cb(new Error("output file is not a file"), file) + [src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)] + checkFile src, (err) -> + copyFile src, dst, (err) -> + if not err? + newFile.build = buildId # attach a build id if we cached the file + cb(err, newFile) , (err, results) -> if err? - callback err, outputFiles + # pass back the original files if we encountered *any* error + callback(err, outputFiles) else + # pass back the list of new files in the cache callback(err, results) - expireOutputFiles: (target, callback) -> - # look in target for build dirs and delete if > N or age of mod time > T - cacheDir = target + '/' + OutputCacheManager.CACHE_DIR - fs.readdir cacheDir, (err, results) -> - callback(err) if callback? + expireOutputFiles: (cacheRoot, options, callback = (error) ->) -> + # look in compileDir for build dirs and delete if > N or age of mod time > T + fs.readdir cacheRoot, (err, results) -> + if err? + logger.error err: err, project_id: cacheRoot, "error clearing cache" + return callback(err) + + dirs = results.sort().reverse() + currentTime = Date.now() + + isExpired = (dir, index) -> + return false if options?.keep == dir + # remove any directories over the hard limit + return true if index > OutputCacheManager.CACHE_LIMIT + # we can get the build time from the directory name + dirTime = parseInt(dir, 16) + age = currentTime - dirTime + return age > OutputCacheManager.CACHE_AGE + + toRemove = _.filter(dirs, isExpired) + + removeDir = (dir, cb) -> + fse.remove Path.join(cacheRoot, dir), (err, result) -> + logger.log cache: cacheRoot, dir: dir, "removed expired cache dir" + if err? + logger.error err: err, dir: dir, "cache remove error" + cb(err, result) + + async.eachSeries toRemove, (dir, cb) -> + removeDir dir, cb + , callback diff --git a/app/coffee/OutputFileOptimiser.coffee b/app/coffee/OutputFileOptimiser.coffee index a00dc20..d1e45d4 100644 --- a/app/coffee/OutputFileOptimiser.coffee +++ b/app/coffee/OutputFileOptimiser.coffee @@ -6,6 +6,8 @@ logger = require "logger-sharelatex" module.exports = OutputFileOptimiser = optimiseFile: (src, dst, callback = (error) ->) -> + # check output file (src) and see if we can optimise it, storing + # the result in the build directory (dst) if src.match(/\.pdf$/) OutputFileOptimiser.optimisePDF src, dst, callback else @@ -19,12 +21,13 @@ module.exports = OutputFileOptimiser = proc = spawn("qpdf", args) stdout = "" proc.stdout.on "data", (chunk) -> - stdout += chunk.toString() - proc.on "error", callback + stdout += chunk.toString() + proc.on "error", callback proc.on "close", (code) -> if code != 0 logger.warn {directory, code}, "qpdf returned error" return callback null fs.rename tmpOutput, dst, (err) -> - # could log an error here - callback null + if err? + logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command" + callback err From 0692e964ef4b45f06c32efcdc79a580b6f679386 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Feb 2015 13:16:01 +0000 Subject: [PATCH 09/14] use OutputCacheManager to construct static path to files --- app.coffee | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app.coffee b/app.coffee index dd2bae5..ee2e838 100644 --- a/app.coffee +++ b/app.coffee @@ -12,6 +12,7 @@ Metrics.initialize("clsi") Metrics.open_sockets.monitor(logger) ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager" +OutputCacheManager = require "./app/js/OutputCacheManager" require("./app/js/db").sync() @@ -68,8 +69,8 @@ staticServer = staticForbidSymLinks Settings.path.compilesDir, setHeaders: (res, res.set("Content-Type", "text/plain") app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) -> - if req.query?.build? && req.query.build.match(/^[0-9]+$/) - req.url = "/#{req.params.project_id}/.cache/clsi/#{req.query.build}/#{req.params[0]}" + if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX) + req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build) + "/#{req.params[0]}" else req.url = "/#{req.params.project_id}/#{req.params[0]}" staticServer(req, res, next) From 37cc9f3715bd677ce4c59f985807759cdbb7abbd Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Feb 2015 13:57:57 +0000 Subject: [PATCH 10/14] provide a static server which forbids symlinks prevents mismatch between rootdir of server and rootdir of symlink checking middleware --- app.coffee | 27 ++++++-------------- app/coffee/OutputCacheManager.coffee | 8 ++++-- app/coffee/StaticServerForbidSymlinks.coffee | 24 +++++++++++++++++ app/coffee/SymlinkCheckerMiddlewear.coffee | 17 ------------ 4 files changed, 38 insertions(+), 38 deletions(-) create mode 100644 app/coffee/StaticServerForbidSymlinks.coffee delete mode 100644 app/coffee/SymlinkCheckerMiddlewear.coffee diff --git a/app.coffee b/app.coffee index ee2e838..9ac1d1d 100644 --- a/app.coffee +++ b/app.coffee @@ -37,24 +37,12 @@ app.delete "/project/:project_id", CompileController.clearCache app.get "/project/:project_id/sync/code", CompileController.syncFromCode app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf -url = require "url" +ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks" -staticForbidSymLinks = (root, options) -> - expressStatic = express.static root, options - basePath = Path.resolve(root) - return (req, res, next) -> - path = url.parse(req.url).pathname - requestedFsPath = Path.normalize("#{basePath}/#{path}") - fs.realpath requestedFsPath, (err, realFsPath)-> - if err? - return res.send(500) - else if requestedFsPath != realFsPath - logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" - return res.send(404) - else - expressStatic(req, res, next) - -staticServer = staticForbidSymLinks Settings.path.compilesDir, setHeaders: (res, path, stat) -> +# create a static server which does not allow access to any symlinks +# avoids possible mismatch of root directory between middleware check +# and serving the files +staticServer = ForbidSymlinks express.static, Settings.path.compilesDir, setHeaders: (res, path, stat) -> if Path.basename(path) == "output.pdf" res.set("Content-Type", "application/pdf") # Calculate an etag in the same way as nginx @@ -68,9 +56,10 @@ staticServer = staticForbidSymLinks Settings.path.compilesDir, setHeaders: (res, # that could be used in same-origin/XSS attacks. res.set("Content-Type", "text/plain") -app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) -> +app.get "/project/:project_id/output/*", (req, res, next) -> if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX) - req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build) + "/#{req.params[0]}" + # for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId) + req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build, "/#{req.params[0]}") else req.url = "/#{req.params.project_id}/#{req.params[0]}" staticServer(req, res, next) diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index f77227a..50c5837 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -13,9 +13,13 @@ module.exports = OutputCacheManager = CACHE_LIMIT: 32 # maximum of 32 cache directories CACHE_AGE: 60*60*1000 # up to one hour old - path: (buildId) -> + path: (buildId, file) -> # used by static server, given build id return '.cache/clsi/buildId' - return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId) + if buildId.match OutputCacheManager.BUILD_REGEX + return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file) + else + # for invalid build id, return top level + return file saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) -> # make a compileDir/CACHE_SUBDIR/build_id directory and diff --git a/app/coffee/StaticServerForbidSymlinks.coffee b/app/coffee/StaticServerForbidSymlinks.coffee new file mode 100644 index 0000000..2f48190 --- /dev/null +++ b/app/coffee/StaticServerForbidSymlinks.coffee @@ -0,0 +1,24 @@ +Path = require("path") +fs = require("fs") +Settings = require("settings-sharelatex") +logger = require("logger-sharelatex") +url = require "url" + +module.exports = ForbidSymlinks = (staticFn, root, options) -> + expressStatic = staticFn root, options + basePath = Path.resolve(root) + return (req, res, next) -> + path = url.parse(req.url)?.pathname + requestedFsPath = Path.normalize("#{basePath}/#{path}") + fs.realpath requestedFsPath, (err, realFsPath)-> + if err? + logger.warn err:err, requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "error checking file access" + if err.code == 'ENOENT' + return res.sendStatus(404) + else + return res.sendStatus(500) + else if requestedFsPath != realFsPath + logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" + return res.sendStatus(404) + else + expressStatic(req, res, next) diff --git a/app/coffee/SymlinkCheckerMiddlewear.coffee b/app/coffee/SymlinkCheckerMiddlewear.coffee deleted file mode 100644 index b7baf1f..0000000 --- a/app/coffee/SymlinkCheckerMiddlewear.coffee +++ /dev/null @@ -1,17 +0,0 @@ -Path = require("path") -fs = require("fs") -Settings = require("settings-sharelatex") -logger = require("logger-sharelatex") - - -module.exports = (req, res, next)-> - basePath = Path.resolve("#{Settings.path.compilesDir}/#{req.params.project_id}") - requestedFsPath = Path.normalize("#{basePath}/#{req.params[0]}") - fs.realpath requestedFsPath, (err, realFsPath)-> - if err? - return res.send(500) - else if requestedFsPath != realFsPath - logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting" - return res.send(404) - else - return next() From 916b4cb40b1dd34d68b2ee19d799152a1404ddbb Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Feb 2015 15:38:57 +0000 Subject: [PATCH 11/14] move convert tests from middleware to restricted static server --- .../StaticServerForbidSymlinksTests.coffee | 82 +++++++++++++++++++ .../SymlinkCheckerMiddlewearTests.coffee | 60 -------------- 2 files changed, 82 insertions(+), 60 deletions(-) create mode 100644 test/unit/coffee/StaticServerForbidSymlinksTests.coffee delete mode 100644 test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee diff --git a/test/unit/coffee/StaticServerForbidSymlinksTests.coffee b/test/unit/coffee/StaticServerForbidSymlinksTests.coffee new file mode 100644 index 0000000..e17bace --- /dev/null +++ b/test/unit/coffee/StaticServerForbidSymlinksTests.coffee @@ -0,0 +1,82 @@ +should = require('chai').should() +SandboxedModule = require('sandboxed-module') +assert = require('assert') +path = require('path') +sinon = require('sinon') +modulePath = path.join __dirname, "../../../app/js/StaticServerForbidSymlinks" +expect = require("chai").expect + +describe "StaticServerForbidSymlinks", -> + + beforeEach -> + + @settings = + path: + compilesDir: "/compiles/here" + + @fs = {} + @ForbidSymlinks = SandboxedModule.require modulePath, requires: + "settings-sharelatex":@settings + "logger-sharelatex": + log:-> + warn:-> + "fs":@fs + + @dummyStatic = (rootDir, options) -> + return (req, res, next) -> + # console.log "dummyStatic serving file", rootDir, "called with", req.url + # serve it + next() + + @StaticServerForbidSymlinks = @ForbidSymlinks @dummyStatic, @settings.path.compilesDir + @req = + params: + project_id:"12345" + + @res = {} + @req.url = "/12345/output.pdf" + + + describe "sending a normal file through", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf") + + it "should call next", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 200 + done() + @StaticServerForbidSymlinks @req, @res, done + + + describe "with a missing file", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, {code: 'ENOENT'}, "#{@settings.path.compilesDir}/#{@req.params.project_id}/unknown.pdf") + + it "should send a 404", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 404 + done() + @StaticServerForbidSymlinks @req, @res + + + describe "with a symlink file", -> + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf") + + it "should send a 404", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 404 + done() + @StaticServerForbidSymlinks @req, @res + + describe "with an error from fs.realpath", -> + + beforeEach -> + @fs.realpath = sinon.stub().callsArgWith(1, "error") + + it "should send a 500", (done)-> + @res.sendStatus = (resCode)-> + resCode.should.equal 500 + done() + @StaticServerForbidSymlinks @req, @res + diff --git a/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee b/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee deleted file mode 100644 index 50bd427..0000000 --- a/test/unit/coffee/SymlinkCheckerMiddlewearTests.coffee +++ /dev/null @@ -1,60 +0,0 @@ -should = require('chai').should() -SandboxedModule = require('sandboxed-module') -assert = require('assert') -path = require('path') -sinon = require('sinon') -modulePath = path.join __dirname, "../../../app/js/SymlinkCheckerMiddlewear" -expect = require("chai").expect - -describe "SymlinkCheckerMiddlewear", -> - - beforeEach -> - - @settings = - path: - compilesDir: "/compiles/here" - - @fs = {} - @SymlinkCheckerMiddlewear = SandboxedModule.require modulePath, requires: - "settings-sharelatex":@settings - "logger-sharelatex": - log:-> - warn:-> - "fs":@fs - @req = - params: - project_id:"12345" - - @res = {} - @req.params[0]= "output.pdf" - - - describe "sending a normal file through", -> - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf") - - it "should call next", (done)-> - @SymlinkCheckerMiddlewear @req, @res, done - - - describe "with a symlink file", -> - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf") - - it "should send a 404", (done)-> - @res.send = (resCode)-> - resCode.should.equal 404 - done() - @SymlinkCheckerMiddlewear @req, @res - - describe "with an error from fs.realpath", -> - - beforeEach -> - @fs.realpath = sinon.stub().callsArgWith(1, "error") - - it "should send a 500", (done)-> - @res.send = (resCode)-> - resCode.should.equal 500 - done() - @SymlinkCheckerMiddlewear @req, @res - From 3a4dd9df508b88876ba9d0be8c8eda920d125c98 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Feb 2015 16:07:02 +0000 Subject: [PATCH 12/14] fix double callback for proc.on 'error' and proc.on 'close' --- app/coffee/OutputFileOptimiser.coffee | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/app/coffee/OutputFileOptimiser.coffee b/app/coffee/OutputFileOptimiser.coffee index d1e45d4..d0c091d 100644 --- a/app/coffee/OutputFileOptimiser.coffee +++ b/app/coffee/OutputFileOptimiser.coffee @@ -2,6 +2,7 @@ fs = require "fs" Path = require "path" spawn = require("child_process").spawn logger = require "logger-sharelatex" +_ = require "underscore" module.exports = OutputFileOptimiser = @@ -22,12 +23,15 @@ module.exports = OutputFileOptimiser = stdout = "" proc.stdout.on "data", (chunk) -> stdout += chunk.toString() - proc.on "error", callback + callback = _.once(callback) # avoid double call back for error and close event + proc.on "error", (err) -> + logger.warn {err, args}, "qpdf failed" + callback(null) # ignore the error proc.on "close", (code) -> if code != 0 - logger.warn {directory, code}, "qpdf returned error" - return callback null + logger.warn {code, args}, "qpdf returned error" + return callback(null) # ignore the error fs.rename tmpOutput, dst, (err) -> if err? logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command" - callback err + callback(null) # ignore the error From 75ef0d6581a3d02e6f4cce96a39ba72a4964ebf3 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 2 Mar 2015 09:58:20 +0000 Subject: [PATCH 13/14] skip cache directory error when empty --- app/coffee/OutputCacheManager.coffee | 1 + 1 file changed, 1 insertion(+) diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index 50c5837..af4ce61 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -83,6 +83,7 @@ module.exports = OutputCacheManager = # look in compileDir for build dirs and delete if > N or age of mod time > T fs.readdir cacheRoot, (err, results) -> if err? + return callback(null) if err.code == 'ENOENT' # cache directory is empty logger.error err: err, project_id: cacheRoot, "error clearing cache" return callback(err) From 7551bc3135759bd70f42c1180aa6a962ec2c6fc9 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 2 Mar 2015 11:31:48 +0000 Subject: [PATCH 14/14] reduce cache limit for pdfs --- app/coffee/OutputCacheManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/coffee/OutputCacheManager.coffee b/app/coffee/OutputCacheManager.coffee index af4ce61..66abc4e 100644 --- a/app/coffee/OutputCacheManager.coffee +++ b/app/coffee/OutputCacheManager.coffee @@ -10,7 +10,7 @@ OutputFileOptimiser = require "./OutputFileOptimiser" module.exports = OutputCacheManager = CACHE_SUBDIR: '.cache/clsi' BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex - CACHE_LIMIT: 32 # maximum of 32 cache directories + CACHE_LIMIT: 2 # maximum number of cache directories CACHE_AGE: 60*60*1000 # up to one hour old path: (buildId, file) ->