Finished Part II of documenting the code.
This commit is contained in:
parent
27c16114d5
commit
a340d8f4bf
40
schedule.js
40
schedule.js
|
@ -22,10 +22,14 @@ function get(req, res, next) {
|
||||||
next();
|
next();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
//FIXME: GO ON WITH THE REST OF THE DOCUMENTATION.
|
|
||||||
//Function for getting the page via http.
|
/**
|
||||||
function getSchedule(getUrl, callback) {
|
* Function for getting the information off of the schedule servers.
|
||||||
var options = url.parse(getUrl);
|
* @param {String} url - The url of the page that needs to be downloaded.
|
||||||
|
* @param {Function} callback - Callback function to return the downloaded information.
|
||||||
|
*/
|
||||||
|
function getSchedule(url, callback) {
|
||||||
|
var options = url.parse(url);
|
||||||
options.socksPort = config().torPort;
|
options.socksPort = config().torPort;
|
||||||
options.socksHost = config().torHost;
|
options.socksHost = config().torHost;
|
||||||
|
|
||||||
|
@ -42,26 +46,35 @@ function getSchedule(getUrl, callback) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scheduleTypes(page) {
|
/**
|
||||||
|
* Function for getting the names of the schedules with Regular Expressions
|
||||||
|
* @param {String} page - The downloaded page with schedule information.
|
||||||
|
* @return {Array} names - An array populated with the schedule names (basic, week)
|
||||||
|
*/
|
||||||
|
function scheduleNames(page) {
|
||||||
var extract = cheerio('table tr td[valign="bottom"] table tr td b, table tr td[valign="bottom"] table tr td a', page).text().split(/\s\s/);
|
var extract = cheerio('table tr td[valign="bottom"] table tr td b, table tr td[valign="bottom"] table tr td a', page).text().split(/\s\s/);
|
||||||
var tab = 0;
|
var tab = 0;
|
||||||
var types = [];
|
var names = [];
|
||||||
|
|
||||||
for (element of extract) {
|
for (element of extract) {
|
||||||
element != '' ? types.push({
|
element != '' ? names.push({
|
||||||
'letter': element.substr(0, 1),
|
'letter': element.substr(0, 1),
|
||||||
'value' : element.match(/.*rooster|t\/m|\d\d\s\w{3}/gi).join(' ').slice(1).toLowerCase(),
|
'value' : element.match(/.*rooster|t\/m|\d\d\s\w{3}/gi).join(' ').slice(1).toLowerCase(),
|
||||||
'tab': tab++
|
'tab': tab++
|
||||||
}) : null;
|
}) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return types;
|
return names;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function for converting the page into a json dataset.
|
/**
|
||||||
|
* Function for converting the page (string) into a JSON datastructure.
|
||||||
|
* @param {String} page - The downloaded page with schedule information.
|
||||||
|
* @return {Object} scheduleData - The converted JSON datastructure.
|
||||||
|
*/
|
||||||
function toJSON(page) {
|
function toJSON(page) {
|
||||||
var result = cheerio('td:nth-child(3) table', page);
|
var result = cheerio('td:nth-child(3) table', page);
|
||||||
var types = scheduleTypes(page);
|
var names = scheduleNames(page);
|
||||||
var isTeacher = cheerio(cheerio(page).find('tr.CoreDark').find('td')[3]).find('a').html() == null;
|
var isTeacher = cheerio(cheerio(page).find('tr.CoreDark').find('td')[3]).find('a').html() == null;
|
||||||
var amountOfDays = cheerio(result).find('tr.AccentDark').find('td').length - 1;
|
var amountOfDays = cheerio(result).find('tr.AccentDark').find('td').length - 1;
|
||||||
var amountOfHours = config().amountOfHours;
|
var amountOfHours = config().amountOfHours;
|
||||||
|
@ -93,7 +106,7 @@ function toJSON(page) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduleData.types = types;
|
scheduleData.names = names;
|
||||||
|
|
||||||
return scheduleData;
|
return scheduleData;
|
||||||
}
|
}
|
||||||
|
@ -109,4 +122,7 @@ function api(lookup, callback) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Exporting the schedule function.
|
//Exporting the schedule function.
|
||||||
module.exports = {'get': get, 'api': api};
|
module.exports = {
|
||||||
|
'get': get,
|
||||||
|
'api': api
|
||||||
|
};
|
||||||
|
|
64
spider.js
64
spider.js
|
@ -1,20 +1,31 @@
|
||||||
|
//spider.js
|
||||||
|
|
||||||
|
//Import first-party modules.
|
||||||
|
var url = require('url');
|
||||||
|
|
||||||
|
//Import third-party modules
|
||||||
var http = require('socks5-http-client');
|
var http = require('socks5-http-client');
|
||||||
var cheerio = require('cheerio');
|
var cheerio = require('cheerio');
|
||||||
var iconv = require('iconv-lite');
|
var iconv = require('iconv-lite');
|
||||||
|
|
||||||
|
//Import self-written modules.
|
||||||
var config = require('./configuration');
|
var config = require('./configuration');
|
||||||
var url = require('url');
|
|
||||||
var database = require('./database')();
|
var database = require('./database')();
|
||||||
|
|
||||||
|
//Define local variables.
|
||||||
var scheduletypes = [
|
var scheduletypes = [
|
||||||
'Klasrooster',
|
'Klasrooster',
|
||||||
'Docentrooster',
|
'Docentrooster',
|
||||||
'Leerlingrooster',
|
'Leerlingrooster',
|
||||||
'Lokaalrooster'
|
'Lokaalrooster'
|
||||||
];
|
];
|
||||||
var schoolID;
|
var schoolID = config().schoolID;
|
||||||
|
|
||||||
//Function for getting pages with http requests.
|
/**
|
||||||
function get() {
|
* Function for crawling the schedule site for data such as: students, teachers
|
||||||
|
* chambers and groups.
|
||||||
|
*/
|
||||||
|
function crawl() {
|
||||||
database.collection('index').drop();
|
database.collection('index').drop();
|
||||||
|
|
||||||
for (scheduletype of scheduletypes) {
|
for (scheduletype of scheduletypes) {
|
||||||
|
@ -43,24 +54,30 @@ function get() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function for extracting the lists with useful information from the crawled pages.
|
/**
|
||||||
//(e.g Student names/ids, Teacher codes, Chamber numbers)
|
* Function for extracting the lists with useful information from the crawled pages.
|
||||||
|
* (e.g Student names/ids, Teacher codes, Chamber numbers)
|
||||||
|
* @param {String} page - A string containing a downloaded schedule page.
|
||||||
|
*/
|
||||||
function extract(page) {
|
function extract(page) {
|
||||||
var array = cheerio('select', page).text().split('\n');
|
var array = cheerio('select', page).text().split('\n');
|
||||||
return array.splice(1, array.length - 2);
|
return array.splice(1, array.length - 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function for ripping all of the information
|
/**
|
||||||
function rip(data) {
|
* Function for ripping all possible information from a page.
|
||||||
var list = extract(data.data);
|
* @param {String} page - A string containing a downloaded schedule page.
|
||||||
|
*/
|
||||||
|
function rip(page) {
|
||||||
|
var list = extract(page.data);
|
||||||
var collection = database.collection('index');
|
var collection = database.collection('index');
|
||||||
|
|
||||||
if (data.type == 'Leerlingrooster') {
|
if (page.type == 'Leerlingrooster') {
|
||||||
|
|
||||||
for(studentcategory of list) {
|
for(studentcategory of list) {
|
||||||
|
|
||||||
(function (studentcategory) {
|
(function (studentcategory) {
|
||||||
var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + data.type + '&afdeling=' + studentcategory);
|
var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + schoolID + '&type=' + page.type + '&afdeling=' + studentcategory);
|
||||||
options.socksPort = config().torPort;
|
options.socksPort = config().torPort;
|
||||||
options.socksHost = config().torHost;
|
options.socksHost = config().torHost;
|
||||||
|
|
||||||
|
@ -89,10 +106,10 @@ function rip(data) {
|
||||||
'first_name' : name.split(' ')[0],
|
'first_name' : name.split(' ')[0],
|
||||||
'last_name' : name.split(' ').splice(1).join(' '),
|
'last_name' : name.split(' ').splice(1).join(' '),
|
||||||
'studentcategory' : studentcategory,
|
'studentcategory' : studentcategory,
|
||||||
'type' : data.type.replace(/rooster/g, '').toLowerCase()
|
'type' : page.type.replace(/rooster/g, '').toLowerCase()
|
||||||
}
|
}
|
||||||
|
|
||||||
collection.insert(databaseEntry, showOutput);
|
collection.insert(databaseEntry);
|
||||||
|
|
||||||
if (studentcategory == list[list.length - 1] && student == listOfStudents.length - 1) {
|
if (studentcategory == list[list.length - 1] && student == listOfStudents.length - 1) {
|
||||||
setTimeout(function () {
|
setTimeout(function () {
|
||||||
|
@ -111,33 +128,20 @@ function rip(data) {
|
||||||
for (entry of list) {
|
for (entry of list) {
|
||||||
var databaseEntry = {
|
var databaseEntry = {
|
||||||
'name' : entry,
|
'name' : entry,
|
||||||
'type' : data.type.replace(/rooster/g, '').toLowerCase()
|
'type' : page.type.replace(/rooster/g, '').toLowerCase()
|
||||||
}
|
}
|
||||||
|
|
||||||
collection.insert(databaseEntry, showOutput);
|
collection.insert(databaseEntry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function being called to access functionality from this module.
|
//Exporting functions as a module.
|
||||||
function crawl() {
|
|
||||||
schoolID = config().schoolID;
|
|
||||||
get();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//Redundant function for draining native-mongodb-driver output
|
|
||||||
function showOutput(error, message) {
|
|
||||||
if (process.argv[3] == '-v') {
|
|
||||||
// if (error) process.stdout.write(error.toString());
|
|
||||||
if (message != null) console.log(message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
'crawl' : crawl
|
'crawl' : crawl
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Testing/ripping command to be used from cli.
|
||||||
if (process.argv[2] == 'test' || process.argv[2] == 'rip') {
|
if (process.argv[2] == 'test' || process.argv[2] == 'rip') {
|
||||||
module.exports.crawl(934);
|
module.exports.crawl(934);
|
||||||
}
|
}
|
||||||
|
|
25
time.js
25
time.js
|
@ -1,13 +1,22 @@
|
||||||
//time.js
|
//time.js
|
||||||
|
|
||||||
|
//Importing self-written modules.
|
||||||
var config = require('./configuration');
|
var config = require('./configuration');
|
||||||
|
|
||||||
//Function for getting the time, with minutes as a fracture.
|
/**
|
||||||
|
* Function for getting the time, with minutes as a fracture.
|
||||||
|
* @return {Float} time - The time, fractured (20.5 instead of 20:30)
|
||||||
|
*/
|
||||||
function get() {
|
function get() {
|
||||||
var time = new Date();
|
var time = new Date();
|
||||||
return time.getHours() + (time.getMinutes() / 60);
|
return time.getHours() + (time.getMinutes() / 60);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function for convert hh:mm to fractured time (hh.hh)
|
/**
|
||||||
|
* Function for convert hh:mm to fractured time (hh.mm)
|
||||||
|
* @param {String} timestr - A string containing a time from and to (e.g "9:15 - 10:00")
|
||||||
|
* @return {Array} array - An array containing the time string split in two.
|
||||||
|
*/
|
||||||
function parse(timestr) {
|
function parse(timestr) {
|
||||||
var parsed = timestr.match(/\d{1,2}:\d+/g);
|
var parsed = timestr.match(/\d{1,2}:\d+/g);
|
||||||
var array = [];
|
var array = [];
|
||||||
|
@ -19,12 +28,21 @@ function parse(timestr) {
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Function for parsing and checking if the currrent time is within the parsed string.
|
/**
|
||||||
|
* Function for parsing and checking if the currrent time is within the parsed string.
|
||||||
|
* @param {String} timespan - A string containing a time from and to (e.g "9:15 - 10:00")
|
||||||
|
* @return {Boolean} - Returns true if the current time is within the timespan or false when it's not.
|
||||||
|
*/
|
||||||
function withinTimespan(timespan) {
|
function withinTimespan(timespan) {
|
||||||
if (get() > parse(timespan)[0] && get() < parse(timespan)[1]) return true;
|
if (get() > parse(timespan)[0] && get() < parse(timespan)[1]) return true;
|
||||||
else return false;
|
else return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function that uses withinTimespan() to determine if the current time is
|
||||||
|
* within the earliest and the latest time strings.
|
||||||
|
* @return {Boolean} - Returns true if the current time is within the timespan or false when it's not.
|
||||||
|
*/
|
||||||
function duringSchool() {
|
function duringSchool() {
|
||||||
var start = parse(config().times[0])[0];
|
var start = parse(config().times[0])[0];
|
||||||
var end = parse(config().times[config().times.length - 1])[1];
|
var end = parse(config().times[config().times.length - 1])[1];
|
||||||
|
@ -33,6 +51,7 @@ function duringSchool() {
|
||||||
else return false;
|
else return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Export the functions as a module.
|
||||||
module.exports = {
|
module.exports = {
|
||||||
'get': get,
|
'get': get,
|
||||||
'withinTimespan': withinTimespan,
|
'withinTimespan': withinTimespan,
|
||||||
|
|
42
web.js
42
web.js
|
@ -1,9 +1,14 @@
|
||||||
//web.js
|
//web.js
|
||||||
|
|
||||||
|
//Import first-party modules.
|
||||||
|
var fs = require('fs');
|
||||||
|
|
||||||
|
//Import third-party modules.
|
||||||
var express = require('express');
|
var express = require('express');
|
||||||
var less = require('express-less');
|
var less = require('express-less');
|
||||||
var body_parser = require('body-parser');
|
var body_parser = require('body-parser');
|
||||||
var fs = require('fs');
|
|
||||||
|
|
||||||
|
//Import self-written modules.
|
||||||
var api = require('./api');
|
var api = require('./api');
|
||||||
var config = require('./configuration');
|
var config = require('./configuration');
|
||||||
var lookup = require('./lookup');
|
var lookup = require('./lookup');
|
||||||
|
@ -12,6 +17,7 @@ var auth = require('./auth');
|
||||||
var redirecter = require('./redirecter');
|
var redirecter = require('./redirecter');
|
||||||
var time = require('./time');
|
var time = require('./time');
|
||||||
|
|
||||||
|
//Setting local variables.
|
||||||
var app = express();
|
var app = express();
|
||||||
|
|
||||||
//Set up jade rendering engine.
|
//Set up jade rendering engine.
|
||||||
|
@ -29,37 +35,47 @@ app.use('/css', less(__dirname + '/resources/less'));
|
||||||
app.use('/js', express.static(__dirname + '/resources/js'));
|
app.use('/js', express.static(__dirname + '/resources/js'));
|
||||||
app.use('/other', express.static(__dirname + '/resources/other'));
|
app.use('/other', express.static(__dirname + '/resources/other'));
|
||||||
|
|
||||||
|
//Initialising homepage.
|
||||||
app.get('/', auth.is, function (req, res) {
|
app.get('/', auth.is, function (req, res) {
|
||||||
res.render('homepage', req);
|
res.render('homepage', req);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
//Initialize redirector when information is posted to the root of the website.
|
||||||
app.post('/', redirecter);
|
app.post('/', redirecter);
|
||||||
|
|
||||||
app.get('/login', function (req, res) {
|
//Initialising behavior for searching.
|
||||||
res.render('login', req);
|
|
||||||
});
|
|
||||||
|
|
||||||
app.post('/login', auth.login);
|
|
||||||
app.get('/logout', auth.logout);
|
|
||||||
|
|
||||||
app.get('/api/:api', function (req, res, next) { next(); });
|
|
||||||
app.param('api', api);
|
|
||||||
|
|
||||||
app.param('search', lookup.get);
|
app.param('search', lookup.get);
|
||||||
|
|
||||||
app.get('/rooster/:search', [auth.is, schedule.get, function (req, res) {
|
app.get('/rooster/:search', [auth.is, schedule.get, function (req, res) {
|
||||||
res.render('schedule', req);
|
res.render('schedule', req);
|
||||||
}]);
|
}]);
|
||||||
|
|
||||||
|
//Initialising behavior for searching through lists.
|
||||||
app.param('list', lookup.list);
|
app.param('list', lookup.list);
|
||||||
|
|
||||||
app.get('/klassenlijst/:list',[auth.is, function (req, res) {
|
app.get('/klassenlijst/:list',[auth.is, function (req, res) {
|
||||||
res.render('list', req);
|
res.render('list', req);
|
||||||
}]);
|
}]);
|
||||||
|
|
||||||
app.listen(config().webPort);
|
//Initialising login page frontend.
|
||||||
plugins();
|
app.get('/login', function (req, res) {
|
||||||
|
res.render('login', req);
|
||||||
|
});
|
||||||
|
|
||||||
|
//Initialising login/logout behavior.
|
||||||
|
app.post('/login', auth.login);
|
||||||
|
app.get('/logout', auth.logout);
|
||||||
|
|
||||||
|
//Intialising API handler.
|
||||||
|
app.get('/api/:api', function (req, res, next) { next(); });
|
||||||
|
app.param('api', api);
|
||||||
|
|
||||||
|
//Initialize the server on configured web port.
|
||||||
|
app.listen(config().webPort);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function for initialising all of the plugins in the plugins/ directory.
|
||||||
|
*/
|
||||||
function plugins() {
|
function plugins() {
|
||||||
var pluginsDirectory = fs.readdirSync(__dirname + '/plugins');
|
var pluginsDirectory = fs.readdirSync(__dirname + '/plugins');
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue