Finished Part II of documenting the code.

This commit is contained in:
Bram van der Veen 2015-08-16 13:18:01 +02:00
parent 27c16114d5
commit a340d8f4bf
4 changed files with 113 additions and 58 deletions

View file

@ -22,10 +22,14 @@ function get(req, res, next) {
next(); next();
}); });
} }
//FIXME: GO ON WITH THE REST OF THE DOCUMENTATION.
//Function for getting the page via http. /**
function getSchedule(getUrl, callback) { * Function for getting the information off of the schedule servers.
var options = url.parse(getUrl); * @param {String} url - The url of the page that needs to be downloaded.
* @param {Function} callback - Callback function to return the downloaded information.
*/
function getSchedule(url, callback) {
var options = url.parse(url);
options.socksPort = config().torPort; options.socksPort = config().torPort;
options.socksHost = config().torHost; options.socksHost = config().torHost;
@ -42,26 +46,35 @@ function getSchedule(getUrl, callback) {
}); });
} }
function scheduleTypes(page) { /**
* Function for getting the names of the schedules with Regular Expressions
* @param {String} page - The downloaded page with schedule information.
* @return {Array} names - An array populated with the schedule names (basic, week)
*/
function scheduleNames(page) {
var extract = cheerio('table tr td[valign="bottom"] table tr td b, table tr td[valign="bottom"] table tr td a', page).text().split(/\s\s/); var extract = cheerio('table tr td[valign="bottom"] table tr td b, table tr td[valign="bottom"] table tr td a', page).text().split(/\s\s/);
var tab = 0; var tab = 0;
var types = []; var names = [];
for (element of extract) { for (element of extract) {
element != '' ? types.push({ element != '' ? names.push({
'letter': element.substr(0, 1), 'letter': element.substr(0, 1),
'value' : element.match(/.*rooster|t\/m|\d\d\s\w{3}/gi).join(' ').slice(1).toLowerCase(), 'value' : element.match(/.*rooster|t\/m|\d\d\s\w{3}/gi).join(' ').slice(1).toLowerCase(),
'tab': tab++ 'tab': tab++
}) : null; }) : null;
} }
return types; return names;
} }
//Function for converting the page into a json dataset. /**
* Function for converting the page (string) into a JSON datastructure.
* @param {String} page - The downloaded page with schedule information.
* @return {Object} scheduleData - The converted JSON datastructure.
*/
function toJSON(page) { function toJSON(page) {
var result = cheerio('td:nth-child(3) table', page); var result = cheerio('td:nth-child(3) table', page);
var types = scheduleTypes(page); var names = scheduleNames(page);
var isTeacher = cheerio(cheerio(page).find('tr.CoreDark').find('td')[3]).find('a').html() == null; var isTeacher = cheerio(cheerio(page).find('tr.CoreDark').find('td')[3]).find('a').html() == null;
var amountOfDays = cheerio(result).find('tr.AccentDark').find('td').length - 1; var amountOfDays = cheerio(result).find('tr.AccentDark').find('td').length - 1;
var amountOfHours = config().amountOfHours; var amountOfHours = config().amountOfHours;
@ -93,7 +106,7 @@ function toJSON(page) {
} }
} }
scheduleData.types = types; scheduleData.names = names;
return scheduleData; return scheduleData;
} }
@ -109,4 +122,7 @@ function api(lookup, callback) {
} }
//Exporting the schedule function. //Exporting the schedule function.
module.exports = {'get': get, 'api': api}; module.exports = {
'get': get,
'api': api
};

View file

@ -1,20 +1,31 @@
//spider.js
//Import first-party modules.
var url = require('url');
//Import third-party modules
var http = require('socks5-http-client'); var http = require('socks5-http-client');
var cheerio = require('cheerio'); var cheerio = require('cheerio');
var iconv = require('iconv-lite'); var iconv = require('iconv-lite');
//Import self-written modules.
var config = require('./configuration'); var config = require('./configuration');
var url = require('url');
var database = require('./database')(); var database = require('./database')();
//Define local variables.
var scheduletypes = [ var scheduletypes = [
'Klasrooster', 'Klasrooster',
'Docentrooster', 'Docentrooster',
'Leerlingrooster', 'Leerlingrooster',
'Lokaalrooster' 'Lokaalrooster'
]; ];
var schoolID; var schoolID = config().schoolID;
//Function for getting pages with http requests. /**
function get() { * Function for crawling the schedule site for data such as: students, teachers
* chambers and groups.
*/
function crawl() {
database.collection('index').drop(); database.collection('index').drop();
for (scheduletype of scheduletypes) { for (scheduletype of scheduletypes) {
@ -43,24 +54,30 @@ function get() {
} }
} }
//Function for extracting the lists with useful information from the crawled pages. /**
//(e.g Student names/ids, Teacher codes, Chamber numbers) * Function for extracting the lists with useful information from the crawled pages.
* (e.g Student names/ids, Teacher codes, Chamber numbers)
* @param {String} page - A string containing a downloaded schedule page.
*/
function extract(page) { function extract(page) {
var array = cheerio('select', page).text().split('\n'); var array = cheerio('select', page).text().split('\n');
return array.splice(1, array.length - 2); return array.splice(1, array.length - 2);
} }
//Function for ripping all of the information /**
function rip(data) { * Function for ripping all possible information from a page.
var list = extract(data.data); * @param {String} page - A string containing a downloaded schedule page.
*/
function rip(page) {
var list = extract(page.data);
var collection = database.collection('index'); var collection = database.collection('index');
if (data.type == 'Leerlingrooster') { if (page.type == 'Leerlingrooster') {
for(studentcategory of list) { for(studentcategory of list) {
(function (studentcategory) { (function (studentcategory) {
var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + data.type + '&afdeling=' + studentcategory); var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + page.type + '&afdeling=' + studentcategory);
options.socksPort = config().torPort; options.socksPort = config().torPort;
options.socksHost = config().torHost; options.socksHost = config().torHost;
@ -89,10 +106,10 @@ function rip(data) {
'first_name' : name.split(' ')[0], 'first_name' : name.split(' ')[0],
'last_name' : name.split(' ').splice(1).join(' '), 'last_name' : name.split(' ').splice(1).join(' '),
'studentcategory' : studentcategory, 'studentcategory' : studentcategory,
'type' : data.type.replace(/rooster/g, '').toLowerCase() 'type' : page.type.replace(/rooster/g, '').toLowerCase()
} }
collection.insert(databaseEntry, showOutput); collection.insert(databaseEntry);
if (studentcategory == list[list.length - 1] && student == listOfStudents.length - 1) { if (studentcategory == list[list.length - 1] && student == listOfStudents.length - 1) {
setTimeout(function () { setTimeout(function () {
@ -111,33 +128,20 @@ function rip(data) {
for (entry of list) { for (entry of list) {
var databaseEntry = { var databaseEntry = {
'name' : entry, 'name' : entry,
'type' : data.type.replace(/rooster/g, '').toLowerCase() 'type' : page.type.replace(/rooster/g, '').toLowerCase()
} }
collection.insert(databaseEntry, showOutput); collection.insert(databaseEntry);
} }
} }
} }
//Function being called to access functionality from this module. //Exporting functions as a module.
function crawl() {
schoolID = config().schoolID;
get();
}
//Redundant function for draining native-mongodb-driver output
function showOutput(error, message) {
if (process.argv[3] == '-v') {
// if (error) process.stdout.write(error.toString());
if (message != null) console.log(message);
}
}
module.exports = { module.exports = {
'crawl' : crawl 'crawl' : crawl
} }
//Testing/ripping command to be used from cli.
if (process.argv[2] == 'test' || process.argv[2] == 'rip') { if (process.argv[2] == 'test' || process.argv[2] == 'rip') {
module.exports.crawl(934); module.exports.crawl(934);
} }

25
time.js
View file

@ -1,13 +1,22 @@
//time.js //time.js
//Importing self-written modules.
var config = require('./configuration'); var config = require('./configuration');
//Function for getting the time, with minutes as a fracture. /**
* Function for getting the time, with minutes as a fracture.
* @return {Float} time - The time, fractured (20.5 instead of 20:30)
*/
function get() { function get() {
var time = new Date(); var time = new Date();
return time.getHours() + (time.getMinutes() / 60); return time.getHours() + (time.getMinutes() / 60);
} }
//Function for convert hh:mm to fractured time (hh.hh) /**
* Function for convert hh:mm to fractured time (hh.mm)
* @param {String} timestr - A string containing a time from and to (e.g "9:15 - 10:00")
* @return {Array} array - An array containing the time string split in two.
*/
function parse(timestr) { function parse(timestr) {
var parsed = timestr.match(/\d{1,2}:\d+/g); var parsed = timestr.match(/\d{1,2}:\d+/g);
var array = []; var array = [];
@ -19,12 +28,21 @@ function parse(timestr) {
return array; return array;
} }
//Function for parsing and checking if the currrent time is within the parsed string. /**
* Function for parsing and checking if the currrent time is within the parsed string.
* @param {String} timespan - A string containing a time from and to (e.g "9:15 - 10:00")
* @return {Boolean} - Returns true if the current time is within the timespan or false when it's not.
*/
function withinTimespan(timespan) { function withinTimespan(timespan) {
if (get() > parse(timespan)[0] && get() < parse(timespan)[1]) return true; if (get() > parse(timespan)[0] && get() < parse(timespan)[1]) return true;
else return false; else return false;
} }
/**
* Function that uses withinTimespan() to determine if the current time is
* within the earliest and the latest time strings.
* @return {Boolean} - Returns true if the current time is within the timespan or false when it's not.
*/
function duringSchool() { function duringSchool() {
var start = parse(config().times[0])[0]; var start = parse(config().times[0])[0];
var end = parse(config().times[config().times.length - 1])[1]; var end = parse(config().times[config().times.length - 1])[1];
@ -33,6 +51,7 @@ function duringSchool() {
else return false; else return false;
} }
//Export the functions as a module.
module.exports = { module.exports = {
'get': get, 'get': get,
'withinTimespan': withinTimespan, 'withinTimespan': withinTimespan,

42
web.js
View file

@ -1,9 +1,14 @@
//web.js //web.js
//Import first-party modules.
var fs = require('fs');
//Import third-party modules.
var express = require('express'); var express = require('express');
var less = require('express-less'); var less = require('express-less');
var body_parser = require('body-parser'); var body_parser = require('body-parser');
var fs = require('fs');
//Import self-written modules.
var api = require('./api'); var api = require('./api');
var config = require('./configuration'); var config = require('./configuration');
var lookup = require('./lookup'); var lookup = require('./lookup');
@ -12,6 +17,7 @@ var auth = require('./auth');
var redirecter = require('./redirecter'); var redirecter = require('./redirecter');
var time = require('./time'); var time = require('./time');
//Setting local variables.
var app = express(); var app = express();
//Set up jade rendering engine. //Set up jade rendering engine.
@ -29,37 +35,47 @@ app.use('/css', less(__dirname + '/resources/less'));
app.use('/js', express.static(__dirname + '/resources/js')); app.use('/js', express.static(__dirname + '/resources/js'));
app.use('/other', express.static(__dirname + '/resources/other')); app.use('/other', express.static(__dirname + '/resources/other'));
//Initialising homepage.
app.get('/', auth.is, function (req, res) { app.get('/', auth.is, function (req, res) {
res.render('homepage', req); res.render('homepage', req);
}); });
//Initialize redirector when information is posted to the root of the website.
app.post('/', redirecter); app.post('/', redirecter);
app.get('/login', function (req, res) { //Initialising behavior for searching.
res.render('login', req);
});
app.post('/login', auth.login);
app.get('/logout', auth.logout);
app.get('/api/:api', function (req, res, next) { next(); });
app.param('api', api);
app.param('search', lookup.get); app.param('search', lookup.get);
app.get('/rooster/:search', [auth.is, schedule.get, function (req, res) { app.get('/rooster/:search', [auth.is, schedule.get, function (req, res) {
res.render('schedule', req); res.render('schedule', req);
}]); }]);
//Initialising behavior for searching through lists.
app.param('list', lookup.list); app.param('list', lookup.list);
app.get('/klassenlijst/:list',[auth.is, function (req, res) { app.get('/klassenlijst/:list',[auth.is, function (req, res) {
res.render('list', req); res.render('list', req);
}]); }]);
app.listen(config().webPort); //Initialising login page frontend.
plugins(); app.get('/login', function (req, res) {
res.render('login', req);
});
//Initialising login/logout behavior.
app.post('/login', auth.login);
app.get('/logout', auth.logout);
//Intialising API handler.
app.get('/api/:api', function (req, res, next) { next(); });
app.param('api', api);
//Initialize the server on configured web port.
app.listen(config().webPort);
/**
* Function for initialising all of the plugins in the plugins/ directory.
*/
function plugins() { function plugins() {
var pluginsDirectory = fs.readdirSync(__dirname + '/plugins'); var pluginsDirectory = fs.readdirSync(__dirname + '/plugins');