From a340d8f4bf18d20f9f59b900f8f15093cd915f5f Mon Sep 17 00:00:00 2001 From: Bram van der Veen <96aa48@gmail.com> Date: Sun, 16 Aug 2015 13:18:01 +0200 Subject: [PATCH] Finished Part II of documenting the code. --- schedule.js | 40 +++++++++++++++++++++++---------- spider.js | 64 ++++++++++++++++++++++++++++------------------------- time.js | 25 ++++++++++++++++++--- web.js | 42 ++++++++++++++++++++++++----------- 4 files changed, 113 insertions(+), 58 deletions(-) diff --git a/schedule.js b/schedule.js index e287c81..d4d2e4c 100644 --- a/schedule.js +++ b/schedule.js @@ -22,10 +22,14 @@ function get(req, res, next) { next(); }); } -//FIXME: GO ON WITH THE REST OF THE DOCUMENTATION. -//Function for getting the page via http. -function getSchedule(getUrl, callback) { - var options = url.parse(getUrl); + +/** + * Function for getting the information off of the schedule servers. + * @param {String} url - The url of the page that needs to be downloaded. + * @param {Function} callback - Callback function to return the downloaded information. + */ +function getSchedule(url, callback) { + var options = url.parse(url); options.socksPort = config().torPort; options.socksHost = config().torHost; @@ -42,26 +46,35 @@ function getSchedule(getUrl, callback) { }); } -function scheduleTypes(page) { +/** + * Function for getting the names of the schedules with Regular Expressions + * @param {String} page - The downloaded page with schedule information. + * @return {Array} names - An array populated with the schedule names (basic, week) + */ +function scheduleNames(page) { var extract = cheerio('table tr td[valign="bottom"] table tr td b, table tr td[valign="bottom"] table tr td a', page).text().split(/\s\s/); var tab = 0; - var types = []; + var names = []; for (element of extract) { - element != '' ? types.push({ + element != '' ? names.push({ 'letter': element.substr(0, 1), 'value' : element.match(/.*rooster|t\/m|\d\d\s\w{3}/gi).join(' ').slice(1).toLowerCase(), 'tab': tab++ }) : null; } - return types; + return names; } -//Function for converting the page into a json dataset. +/** + * Function for converting the page (string) into a JSON datastructure. + * @param {String} page - The downloaded page with schedule information. + * @return {Object} scheduleData - The converted JSON datastructure. + */ function toJSON(page) { var result = cheerio('td:nth-child(3) table', page); - var types = scheduleTypes(page); + var names = scheduleNames(page); var isTeacher = cheerio(cheerio(page).find('tr.CoreDark').find('td')[3]).find('a').html() == null; var amountOfDays = cheerio(result).find('tr.AccentDark').find('td').length - 1; var amountOfHours = config().amountOfHours; @@ -93,7 +106,7 @@ function toJSON(page) { } } - scheduleData.types = types; + scheduleData.names = names; return scheduleData; } @@ -109,4 +122,7 @@ function api(lookup, callback) { } //Exporting the schedule function. -module.exports = {'get': get, 'api': api}; +module.exports = { + 'get': get, + 'api': api +}; diff --git a/spider.js b/spider.js index 48368e1..e7eee97 100644 --- a/spider.js +++ b/spider.js @@ -1,20 +1,31 @@ +//spider.js + +//Import first-party modules. +var url = require('url'); + +//Import third-party modules var http = require('socks5-http-client'); var cheerio = require('cheerio'); var iconv = require('iconv-lite'); + +//Import self-written modules. var config = require('./configuration'); -var url = require('url'); var database = require('./database')(); +//Define local variables. var scheduletypes = [ 'Klasrooster', 'Docentrooster', 'Leerlingrooster', 'Lokaalrooster' ]; -var schoolID; +var schoolID = config().schoolID; -//Function for getting pages with http requests. -function get() { +/** + * Function for crawling the schedule site for data such as: students, teachers + * chambers and groups. + */ +function crawl() { database.collection('index').drop(); for (scheduletype of scheduletypes) { @@ -43,24 +54,30 @@ function get() { } } -//Function for extracting the lists with useful information from the crawled pages. -//(e.g Student names/ids, Teacher codes, Chamber numbers) +/** + * Function for extracting the lists with useful information from the crawled pages. + * (e.g Student names/ids, Teacher codes, Chamber numbers) + * @param {String} page - A string containing a downloaded schedule page. + */ function extract(page) { var array = cheerio('select', page).text().split('\n'); return array.splice(1, array.length - 2); } -//Function for ripping all of the information -function rip(data) { - var list = extract(data.data); +/** + * Function for ripping all possible information from a page. + * @param {String} page - A string containing a downloaded schedule page. + */ +function rip(page) { + var list = extract(page.data); var collection = database.collection('index'); - if (data.type == 'Leerlingrooster') { + if (page.type == 'Leerlingrooster') { for(studentcategory of list) { (function (studentcategory) { - var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + data.type + '&afdeling=' + studentcategory); + var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + page.type + '&afdeling=' + studentcategory); options.socksPort = config().torPort; options.socksHost = config().torHost; @@ -89,10 +106,10 @@ function rip(data) { 'first_name' : name.split(' ')[0], 'last_name' : name.split(' ').splice(1).join(' '), 'studentcategory' : studentcategory, - 'type' : data.type.replace(/rooster/g, '').toLowerCase() + 'type' : page.type.replace(/rooster/g, '').toLowerCase() } - collection.insert(databaseEntry, showOutput); + collection.insert(databaseEntry); if (studentcategory == list[list.length - 1] && student == listOfStudents.length - 1) { setTimeout(function () { @@ -111,33 +128,20 @@ function rip(data) { for (entry of list) { var databaseEntry = { 'name' : entry, - 'type' : data.type.replace(/rooster/g, '').toLowerCase() + 'type' : page.type.replace(/rooster/g, '').toLowerCase() } - collection.insert(databaseEntry, showOutput); + collection.insert(databaseEntry); } } } -//Function being called to access functionality from this module. -function crawl() { - schoolID = config().schoolID; - get(); -} - - -//Redundant function for draining native-mongodb-driver output -function showOutput(error, message) { - if (process.argv[3] == '-v') { - // if (error) process.stdout.write(error.toString()); - if (message != null) console.log(message); - } -} - +//Exporting functions as a module. module.exports = { 'crawl' : crawl } +//Testing/ripping command to be used from cli. if (process.argv[2] == 'test' || process.argv[2] == 'rip') { module.exports.crawl(934); } diff --git a/time.js b/time.js index 7fde237..ddcad17 100644 --- a/time.js +++ b/time.js @@ -1,13 +1,22 @@ //time.js + +//Importing self-written modules. var config = require('./configuration'); -//Function for getting the time, with minutes as a fracture. +/** + * Function for getting the time, with minutes as a fracture. + * @return {Float} time - The time, fractured (20.5 instead of 20:30) + */ function get() { var time = new Date(); return time.getHours() + (time.getMinutes() / 60); } -//Function for convert hh:mm to fractured time (hh.hh) +/** + * Function for convert hh:mm to fractured time (hh.mm) + * @param {String} timestr - A string containing a time from and to (e.g "9:15 - 10:00") + * @return {Array} array - An array containing the time string split in two. + */ function parse(timestr) { var parsed = timestr.match(/\d{1,2}:\d+/g); var array = []; @@ -19,12 +28,21 @@ function parse(timestr) { return array; } -//Function for parsing and checking if the currrent time is within the parsed string. +/** + * Function for parsing and checking if the currrent time is within the parsed string. + * @param {String} timespan - A string containing a time from and to (e.g "9:15 - 10:00") + * @return {Boolean} - Returns true if the current time is within the timespan or false when it's not. + */ function withinTimespan(timespan) { if (get() > parse(timespan)[0] && get() < parse(timespan)[1]) return true; else return false; } +/** + * Function that uses withinTimespan() to determine if the current time is + * within the earliest and the latest time strings. + * @return {Boolean} - Returns true if the current time is within the timespan or false when it's not. + */ function duringSchool() { var start = parse(config().times[0])[0]; var end = parse(config().times[config().times.length - 1])[1]; @@ -33,6 +51,7 @@ function duringSchool() { else return false; } +//Export the functions as a module. module.exports = { 'get': get, 'withinTimespan': withinTimespan, diff --git a/web.js b/web.js index 71b37a8..7b0c0ee 100644 --- a/web.js +++ b/web.js @@ -1,9 +1,14 @@ //web.js + +//Import first-party modules. +var fs = require('fs'); + +//Import third-party modules. var express = require('express'); var less = require('express-less'); var body_parser = require('body-parser'); -var fs = require('fs'); +//Import self-written modules. var api = require('./api'); var config = require('./configuration'); var lookup = require('./lookup'); @@ -12,6 +17,7 @@ var auth = require('./auth'); var redirecter = require('./redirecter'); var time = require('./time'); +//Setting local variables. var app = express(); //Set up jade rendering engine. @@ -29,37 +35,47 @@ app.use('/css', less(__dirname + '/resources/less')); app.use('/js', express.static(__dirname + '/resources/js')); app.use('/other', express.static(__dirname + '/resources/other')); +//Initialising homepage. app.get('/', auth.is, function (req, res) { res.render('homepage', req); }); +//Initialize redirector when information is posted to the root of the website. app.post('/', redirecter); -app.get('/login', function (req, res) { - res.render('login', req); -}); - -app.post('/login', auth.login); -app.get('/logout', auth.logout); - -app.get('/api/:api', function (req, res, next) { next(); }); -app.param('api', api); - +//Initialising behavior for searching. app.param('search', lookup.get); app.get('/rooster/:search', [auth.is, schedule.get, function (req, res) { res.render('schedule', req); }]); +//Initialising behavior for searching through lists. app.param('list', lookup.list); app.get('/klassenlijst/:list',[auth.is, function (req, res) { res.render('list', req); }]); -app.listen(config().webPort); -plugins(); +//Initialising login page frontend. +app.get('/login', function (req, res) { + res.render('login', req); +}); +//Initialising login/logout behavior. +app.post('/login', auth.login); +app.get('/logout', auth.logout); + +//Intialising API handler. +app.get('/api/:api', function (req, res, next) { next(); }); +app.param('api', api); + +//Initialize the server on configured web port. +app.listen(config().webPort); + +/** + * Function for initialising all of the plugins in the plugins/ directory. + */ function plugins() { var pluginsDirectory = fs.readdirSync(__dirname + '/plugins');