21 Commits

Author SHA1 Message Date
James Allen
a35df6d829 Release version 0.1.4 2015-03-20 15:25:02 +00:00
Brian Gough
1a794d804a Merge branch 'master' of github.com:sharelatex/clsi-sharelatex 2015-03-16 16:47:56 +00:00
Brian Gough
24e20a79f4 remove unnecessary call to async.series in OutputFileFinder
callback was previously async but is now synchronous, so high stack
usage.
2015-03-16 16:47:25 +00:00
Brian Gough
c47f49e24b Merge pull request #17 from sharelatex/add-v8-performance
add v8 profiler
2015-03-16 15:18:48 +00:00
Brian Gough
65f2f23cf6 add v8 profiler on /profile?time=MS url 2015-03-16 15:02:45 +00:00
Brian Gough
8d7d637eed Merge pull request #16 from sharelatex/cache-output-files
Keep output files in cache
2015-03-02 12:06:06 +00:00
Brian Gough
7551bc3135 reduce cache limit for pdfs 2015-03-02 11:31:48 +00:00
Brian Gough
75ef0d6581 skip cache directory error when empty 2015-03-02 09:58:20 +00:00
Brian Gough
31f62c7a7b Merge branch 'master' into cache-output-files 2015-03-02 09:18:44 +00:00
Brian Gough
3a4dd9df50 fix double callback for proc.on 'error' and proc.on 'close' 2015-02-27 16:07:02 +00:00
Brian Gough
916b4cb40b move convert tests from middleware to restricted static server 2015-02-27 15:38:57 +00:00
Brian Gough
37cc9f3715 provide a static server which forbids symlinks
prevents mismatch between rootdir of server and rootdir of symlink
checking middleware
2015-02-27 13:57:57 +00:00
Brian Gough
0692e964ef use OutputCacheManager to construct static path to files 2015-02-27 13:16:01 +00:00
Brian Gough
198e1ef492 cleanup and logging 2015-02-27 13:15:35 +00:00
Brian Gough
280d64cf60 remove debugging code 2015-02-26 15:32:01 +00:00
Brian Gough
e7ed8d786a fix tests to allow for build parameter 2015-02-26 15:31:12 +00:00
Brian Gough
151ea99639 accept build id parameter when serving static files 2015-02-25 17:05:19 +00:00
Brian Gough
b8cdd4fa85 added package dependencies for caching 2015-02-24 16:09:55 +00:00
Brian Gough
163a33674b add an optimisation pass for the cached output files 2015-02-24 15:48:34 +00:00
Brian Gough
67bfeacab8 skip the cache directory when finding output files 2015-02-24 14:40:22 +00:00
Brian Gough
1923352e66 save output files in a .cache directory 2015-02-24 14:40:05 +00:00
13 changed files with 310 additions and 99 deletions

View File

@@ -12,6 +12,7 @@ Metrics.initialize("clsi")
Metrics.open_sockets.monitor(logger) Metrics.open_sockets.monitor(logger)
ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager" ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager"
OutputCacheManager = require "./app/js/OutputCacheManager"
require("./app/js/db").sync() require("./app/js/db").sync()
@@ -36,7 +37,12 @@ app.delete "/project/:project_id", CompileController.clearCache
app.get "/project/:project_id/sync/code", CompileController.syncFromCode app.get "/project/:project_id/sync/code", CompileController.syncFromCode
app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf
staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, stat) -> ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks"
# create a static server which does not allow access to any symlinks
# avoids possible mismatch of root directory between middleware check
# and serving the files
staticServer = ForbidSymlinks express.static, Settings.path.compilesDir, setHeaders: (res, path, stat) ->
if Path.basename(path) == "output.pdf" if Path.basename(path) == "output.pdf"
res.set("Content-Type", "application/pdf") res.set("Content-Type", "application/pdf")
# Calculate an etag in the same way as nginx # Calculate an etag in the same way as nginx
@@ -50,7 +56,11 @@ staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path,
# that could be used in same-origin/XSS attacks. # that could be used in same-origin/XSS attacks.
res.set("Content-Type", "text/plain") res.set("Content-Type", "text/plain")
app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) -> app.get "/project/:project_id/output/*", (req, res, next) ->
if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX)
# for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId)
req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build, "/#{req.params[0]}")
else
req.url = "/#{req.params.project_id}/#{req.params[0]}" req.url = "/#{req.params.project_id}/#{req.params[0]}"
staticServer(req, res, next) staticServer(req, res, next)
@@ -76,6 +86,15 @@ app.get "/health_check", (req, res)->
res.contentType(resCacher?.setContentType) res.contentType(resCacher?.setContentType)
res.send resCacher?.code, resCacher?.body res.send resCacher?.code, resCacher?.body
profiler = require "v8-profiler"
app.get "/profile", (req, res) ->
time = parseInt(req.query.time || "1000")
profiler.startProfiling("test")
setTimeout () ->
profile = profiler.stopProfiling("test")
res.json(profile)
, time
app.use (error, req, res, next) -> app.use (error, req, res, next) ->
logger.error err: error, "server error" logger.error err: error, "server error"
res.send error?.statusCode || 500 res.send error?.statusCode || 500

View File

@@ -35,6 +35,7 @@ module.exports = CompileController =
outputFiles: outputFiles.map (file) -> outputFiles: outputFiles.map (file) ->
url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}" url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}"
type: file.type type: file.type
build: file.build
} }
clearCache: (req, res, next = (error) ->) -> clearCache: (req, res, next = (error) ->) ->

View File

@@ -1,6 +1,7 @@
ResourceWriter = require "./ResourceWriter" ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner" LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder" OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
Settings = require("settings-sharelatex") Settings = require("settings-sharelatex")
Path = require "path" Path = require "path"
logger = require "logger-sharelatex" logger = require "logger-sharelatex"
@@ -32,7 +33,8 @@ module.exports = CompileManager =
OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) -> OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) ->
return callback(error) if error? return callback(error) if error?
callback null, outputFiles OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
clearProject: (project_id, _callback = (error) ->) -> clearProject: (project_id, _callback = (error) ->) ->
callback = (error) -> callback = (error) ->

View File

@@ -0,0 +1,113 @@
async = require "async"
fs = require "fs"
fse = require "fs-extra"
Path = require "path"
logger = require "logger-sharelatex"
_ = require "underscore"
OutputFileOptimiser = require "./OutputFileOptimiser"
module.exports = OutputCacheManager =
CACHE_SUBDIR: '.cache/clsi'
BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex
CACHE_LIMIT: 2 # maximum number of cache directories
CACHE_AGE: 60*60*1000 # up to one hour old
path: (buildId, file) ->
# used by static server, given build id return '.cache/clsi/buildId'
if buildId.match OutputCacheManager.BUILD_REGEX
return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file)
else
# for invalid build id, return top level
return file
saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) ->
# make a compileDir/CACHE_SUBDIR/build_id directory and
# copy all the output files into it
cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR)
# Put the files into a new cache subdirectory
buildId = Date.now().toString(16)
cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId)
# let file expiry run in the background
OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId}
checkFile = (src, callback) ->
# check if we have a valid file to copy into the cache
fs.stat src, (err, stats) ->
if err?
# some problem reading the file
logger.error err: err, file: src, "stat error for file in cache"
callback(err)
else if not stats.isFile()
# other filetype - reject it
logger.error err: err, src: src, dst: dst, stat: stats, "nonfile output - refusing to copy to cache"
callback(new Error("output file is not a file"), file)
else
# it's a plain file, ok to copy
callback(null)
copyFile = (src, dst, callback) ->
# copy output file into the cache
fse.copy src, dst, (err) ->
if err?
logger.error err: err, src: src, dst: dst, "copy error for file in cache"
callback(err)
else
# call the optimiser for the file too
OutputFileOptimiser.optimiseFile src, dst, callback
# make the new cache directory
fse.ensureDir cacheDir, (err) ->
if err?
logger.error err: err, directory: cacheDir, "error creating cache directory"
callback(err, outputFiles)
else
# copy all the output files into the new cache directory
async.mapSeries outputFiles, (file, cb) ->
newFile = _.clone(file)
[src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)]
checkFile src, (err) ->
copyFile src, dst, (err) ->
if not err?
newFile.build = buildId # attach a build id if we cached the file
cb(err, newFile)
, (err, results) ->
if err?
# pass back the original files if we encountered *any* error
callback(err, outputFiles)
else
# pass back the list of new files in the cache
callback(err, results)
expireOutputFiles: (cacheRoot, options, callback = (error) ->) ->
# look in compileDir for build dirs and delete if > N or age of mod time > T
fs.readdir cacheRoot, (err, results) ->
if err?
return callback(null) if err.code == 'ENOENT' # cache directory is empty
logger.error err: err, project_id: cacheRoot, "error clearing cache"
return callback(err)
dirs = results.sort().reverse()
currentTime = Date.now()
isExpired = (dir, index) ->
return false if options?.keep == dir
# remove any directories over the hard limit
return true if index > OutputCacheManager.CACHE_LIMIT
# we can get the build time from the directory name
dirTime = parseInt(dir, 16)
age = currentTime - dirTime
return age > OutputCacheManager.CACHE_AGE
toRemove = _.filter(dirs, isExpired)
removeDir = (dir, cb) ->
fse.remove Path.join(cacheRoot, dir), (err, result) ->
logger.log cache: cacheRoot, dir: dir, "removed expired cache dir"
if err?
logger.error err: err, dir: dir, "cache remove error"
cb(err, result)
async.eachSeries toRemove, (dir, cb) ->
removeDir dir, cb
, callback

View File

@@ -17,19 +17,11 @@ module.exports = OutputFileFinder =
jobs = [] jobs = []
outputFiles = [] outputFiles = []
for file in allFiles for file in allFiles
do (file) -> if !incomingResources[file]
jobs.push (callback) ->
if incomingResources[file]
return callback()
else
outputFiles.push { outputFiles.push {
path: file path: file
type: file.match(/\.([^\.]+)$/)?[1] type: file.match(/\.([^\.]+)$/)?[1]
} }
callback()
async.series jobs, (error) ->
return callback(error) if error?
callback null, outputFiles callback null, outputFiles
_getAllFiles: (directory, _callback = (error, fileList) ->) -> _getAllFiles: (directory, _callback = (error, fileList) ->) ->
@@ -37,7 +29,7 @@ module.exports = OutputFileFinder =
_callback(error, fileList) _callback(error, fileList)
_callback = () -> _callback = () ->
args = [directory, "-type", "f"] args = [directory, "-name", ".cache", "-prune", "-o", "-type", "f", "-print"]
logger.log args: args, "running find command" logger.log args: args, "running find command"
proc = spawn("find", args) proc = spawn("find", args)

View File

@@ -0,0 +1,37 @@
fs = require "fs"
Path = require "path"
spawn = require("child_process").spawn
logger = require "logger-sharelatex"
_ = require "underscore"
module.exports = OutputFileOptimiser =
optimiseFile: (src, dst, callback = (error) ->) ->
# check output file (src) and see if we can optimise it, storing
# the result in the build directory (dst)
if src.match(/\.pdf$/)
OutputFileOptimiser.optimisePDF src, dst, callback
else
callback (null)
optimisePDF: (src, dst, callback = (error) ->) ->
tmpOutput = dst + '.opt'
args = ["--linearize", src, tmpOutput]
logger.log args: args, "running qpdf command"
proc = spawn("qpdf", args)
stdout = ""
proc.stdout.on "data", (chunk) ->
stdout += chunk.toString()
callback = _.once(callback) # avoid double call back for error and close event
proc.on "error", (err) ->
logger.warn {err, args}, "qpdf failed"
callback(null) # ignore the error
proc.on "close", (code) ->
if code != 0
logger.warn {code, args}, "qpdf returned error"
return callback(null) # ignore the error
fs.rename tmpOutput, dst, (err) ->
if err?
logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command"
callback(null) # ignore the error

View File

@@ -0,0 +1,24 @@
Path = require("path")
fs = require("fs")
Settings = require("settings-sharelatex")
logger = require("logger-sharelatex")
url = require "url"
module.exports = ForbidSymlinks = (staticFn, root, options) ->
expressStatic = staticFn root, options
basePath = Path.resolve(root)
return (req, res, next) ->
path = url.parse(req.url)?.pathname
requestedFsPath = Path.normalize("#{basePath}/#{path}")
fs.realpath requestedFsPath, (err, realFsPath)->
if err?
logger.warn err:err, requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "error checking file access"
if err.code == 'ENOENT'
return res.sendStatus(404)
else
return res.sendStatus(500)
else if requestedFsPath != realFsPath
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
return res.sendStatus(404)
else
expressStatic(req, res, next)

View File

@@ -1,17 +0,0 @@
Path = require("path")
fs = require("fs")
Settings = require("settings-sharelatex")
logger = require("logger-sharelatex")
module.exports = (req, res, next)->
basePath = Path.resolve("#{Settings.path.compilesDir}/#{req.params.project_id}")
requestedFsPath = Path.normalize("#{basePath}/#{req.params[0]}")
fs.realpath requestedFsPath, (err, realFsPath)->
if err?
return res.send(500)
else if requestedFsPath != realFsPath
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
return res.send(404)
else
return next()

View File

@@ -1,7 +1,7 @@
{ {
"name": "node-clsi", "name": "node-clsi",
"description": "A Node.js implementation of the CLSI LaTeX web-API", "description": "A Node.js implementation of the CLSI LaTeX web-API",
"version": "0.1.3", "version": "0.1.4",
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://github.com/sharelatex/clsi-sharelatex.git" "url": "https://github.com/sharelatex/clsi-sharelatex.git"
@@ -21,7 +21,10 @@
"smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0", "smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0",
"sqlite3": "~2.2.0", "sqlite3": "~2.2.0",
"express": "^4.2.0", "express": "^4.2.0",
"body-parser": "^1.2.0" "body-parser": "^1.2.0",
"fs-extra": "^0.16.3",
"underscore": "^1.8.2",
"v8-profiler": "^5.2.4"
}, },
"devDependencies": { "devDependencies": {
"mocha": "1.10.0", "mocha": "1.10.0",

View File

@@ -36,9 +36,11 @@ describe "CompileController", ->
@output_files = [{ @output_files = [{
path: "output.pdf" path: "output.pdf"
type: "pdf" type: "pdf"
build: 1234
}, { }, {
path: "output.log" path: "output.log"
type: "log" type: "log"
build: 1234
}] }]
@RequestParser.parse = sinon.stub().callsArgWith(1, null, @request) @RequestParser.parse = sinon.stub().callsArgWith(1, null, @request)
@ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1) @ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1)
@@ -73,6 +75,7 @@ describe "CompileController", ->
outputFiles: @output_files.map (file) => outputFiles: @output_files.map (file) =>
url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}" url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}"
type: file.type type: file.type
build: file.build
) )
.should.equal true .should.equal true

View File

@@ -12,6 +12,7 @@ describe "CompileManager", ->
"./LatexRunner": @LatexRunner = {} "./LatexRunner": @LatexRunner = {}
"./ResourceWriter": @ResourceWriter = {} "./ResourceWriter": @ResourceWriter = {}
"./OutputFileFinder": @OutputFileFinder = {} "./OutputFileFinder": @OutputFileFinder = {}
"./OutputCacheManager": @OutputCacheManager = {}
"settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" } "settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" }
"logger-sharelatex": @logger = { log: sinon.stub() } "logger-sharelatex": @logger = { log: sinon.stub() }
"child_process": @child_process = {} "child_process": @child_process = {}
@@ -26,6 +27,15 @@ describe "CompileManager", ->
path: "output.pdf" path: "output.pdf"
type: "pdf" type: "pdf"
}] }]
@build_files = [{
path: "output.log"
type: "log"
build: 1234
}, {
path: "output.pdf"
type: "pdf"
build: 1234
}]
@request = @request =
resources: @resources = "mock-resources" resources: @resources = "mock-resources"
rootResourcePath: @rootResourcePath = "main.tex" rootResourcePath: @rootResourcePath = "main.tex"
@@ -37,6 +47,7 @@ describe "CompileManager", ->
@ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3) @ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3)
@LatexRunner.runLatex = sinon.stub().callsArg(2) @LatexRunner.runLatex = sinon.stub().callsArg(2)
@OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files) @OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files)
@OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files)
@CompileManager.doCompile @request, @callback @CompileManager.doCompile @request, @callback
it "should write the resources to disk", -> it "should write the resources to disk", ->
@@ -60,7 +71,8 @@ describe "CompileManager", ->
.should.equal true .should.equal true
it "should return the output files", -> it "should return the output files", ->
@callback.calledWith(null, @output_files).should.equal true console.log 'output_files', @build_files
@callback.calledWith(null, @build_files).should.equal true
describe "clearProject", -> describe "clearProject", ->
describe "succesfully", -> describe "succesfully", ->

View File

@@ -0,0 +1,82 @@
should = require('chai').should()
SandboxedModule = require('sandboxed-module')
assert = require('assert')
path = require('path')
sinon = require('sinon')
modulePath = path.join __dirname, "../../../app/js/StaticServerForbidSymlinks"
expect = require("chai").expect
describe "StaticServerForbidSymlinks", ->
beforeEach ->
@settings =
path:
compilesDir: "/compiles/here"
@fs = {}
@ForbidSymlinks = SandboxedModule.require modulePath, requires:
"settings-sharelatex":@settings
"logger-sharelatex":
log:->
warn:->
"fs":@fs
@dummyStatic = (rootDir, options) ->
return (req, res, next) ->
# console.log "dummyStatic serving file", rootDir, "called with", req.url
# serve it
next()
@StaticServerForbidSymlinks = @ForbidSymlinks @dummyStatic, @settings.path.compilesDir
@req =
params:
project_id:"12345"
@res = {}
@req.url = "/12345/output.pdf"
describe "sending a normal file through", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
it "should call next", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 200
done()
@StaticServerForbidSymlinks @req, @res, done
describe "with a missing file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, {code: 'ENOENT'}, "#{@settings.path.compilesDir}/#{@req.params.project_id}/unknown.pdf")
it "should send a 404", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 404
done()
@StaticServerForbidSymlinks @req, @res
describe "with a symlink file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
it "should send a 404", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 404
done()
@StaticServerForbidSymlinks @req, @res
describe "with an error from fs.realpath", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, "error")
it "should send a 500", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 500
done()
@StaticServerForbidSymlinks @req, @res

View File

@@ -1,60 +0,0 @@
should = require('chai').should()
SandboxedModule = require('sandboxed-module')
assert = require('assert')
path = require('path')
sinon = require('sinon')
modulePath = path.join __dirname, "../../../app/js/SymlinkCheckerMiddlewear"
expect = require("chai").expect
describe "SymlinkCheckerMiddlewear", ->
beforeEach ->
@settings =
path:
compilesDir: "/compiles/here"
@fs = {}
@SymlinkCheckerMiddlewear = SandboxedModule.require modulePath, requires:
"settings-sharelatex":@settings
"logger-sharelatex":
log:->
warn:->
"fs":@fs
@req =
params:
project_id:"12345"
@res = {}
@req.params[0]= "output.pdf"
describe "sending a normal file through", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
it "should call next", (done)->
@SymlinkCheckerMiddlewear @req, @res, done
describe "with a symlink file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
it "should send a 404", (done)->
@res.send = (resCode)->
resCode.should.equal 404
done()
@SymlinkCheckerMiddlewear @req, @res
describe "with an error from fs.realpath", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, "error")
it "should send a 500", (done)->
@res.send = (resCode)->
resCode.should.equal 500
done()
@SymlinkCheckerMiddlewear @req, @res