Added an end to the script, (to close the database);

This commit is contained in:
Bram van der Veen 2015-06-07 22:26:24 +02:00
parent 96fe5e8229
commit 67772984c5

View file

@ -2,6 +2,7 @@
var http = require('http'); var http = require('http');
var cheerio = require('cheerio'); var cheerio = require('cheerio');
var iconv = require('iconv-lite'); var iconv = require('iconv-lite');
var mongodb = require('mongodb').MongoClient;
//Define schooltypes that need to be ripped. //Define schooltypes that need to be ripped.
var scheduletypes = [ var scheduletypes = [
@ -13,8 +14,11 @@ var scheduletypes = [
var schoolid; var schoolid;
//Function for getting pages with http requests. //Function for getting pages with http requests.
function get(callback) { function get(database) {
var index = [];
var collection = database.collection('index');
collection.drop();
//Go past all of the scheduletypes and download their pages. //Go past all of the scheduletypes and download their pages.
for (scheduletype of scheduletypes) { for (scheduletype of scheduletypes) {
(function (scheduletype) { (function (scheduletype) {
@ -34,6 +38,8 @@ function get(callback) {
if (scheduletype == 'leerling') { if (scheduletype == 'leerling') {
for(studentcategory of list) { for(studentcategory of list) {
(function (studentcategory) {
http.get('http://' + res.req.socket._host + res.req.path + '&afdeling=' + studentcategory, function (res) { http.get('http://' + res.req.socket._host + res.req.path + '&afdeling=' + studentcategory, function (res) {
var _download = ''; var _download = '';
@ -42,28 +48,34 @@ function get(callback) {
}); });
res.on('end', function () { res.on('end', function () {
var list = cheerio('select', _download).children(); var list_students = cheerio('select', _download).children();
for (student in list) { for (student in list_students) {
if (!isNaN(student)) { if (!isNaN(student)) {
var name = cheerio(list[student]).text().split(' - ')[1]; var name = cheerio(list_students[student]).text().split(' - ')[1];
var id = cheerio(list[student]).val(); var id = cheerio(list_students[student]).val();
var database_entry = { var database_entry = {
'id' : id, 'id' : id,
'username' : id + name.split(' ')[0].toLowerCase(),
'full_name' : name, 'full_name' : name,
'first_name' : name.split(' ')[0], 'first_name' : name.split(' ')[0],
'last_name' : name.split(' ').splice(1).join(' '), 'last_name' : name.split(' ').splice(1).join(' '),
'studentcategory' : studentcategory,
'type' : scheduletype 'type' : scheduletype
} }
index.push(database_entry);
collection.insert(database_entry, showOutput);
if (studentcategory == list[list.length - 1] && student == list_students.length - 1) {
database.close();
}
} }
} }
}); });
}); });
})(studentcategory);
} }
callback(index);
} }
else { else {
for (entry of list) { for (entry of list) {
@ -71,7 +83,7 @@ function get(callback) {
'name' : entry, 'name' : entry,
'type' : scheduletype 'type' : scheduletype
} }
index.push(database_entry); collection.insert(database_entry, showOutput);
} }
} }
@ -91,10 +103,17 @@ function extract(page) {
//Function being called to access functionality from this module. //Function being called to access functionality from this module.
function crawl(sid) { function crawl(sid) {
schoolid = sid; schoolid = sid;
var times = 0; mongodb.connect('mongodb://wallpiece/roosterio', function (error, database) {
get(function (data) { if (error) console.warn(error);
console.log(data); get(database);
}); })
}
function showOutput(error, message) {
if (process.argv[2] == '-v') {
if (error) process.stdout.write(error.toString());
process.stdout.write(message + '\n');
}
} }
crawl(934); crawl(934);