From 4a51022039bcca0fbed0e0acafa47a66ce20f2e4 Mon Sep 17 00:00:00 2001 From: Bram van der Veen <96aa48@gmail.com> Date: Wed, 24 Jun 2015 17:07:52 +0200 Subject: [PATCH] Added socks support with configurable port. --- configuration.js | 1 + schedule.js | 11 +++++++++-- spider.js | 12 ++++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/configuration.js b/configuration.js index 59f5659..1346071 100644 --- a/configuration.js +++ b/configuration.js @@ -9,6 +9,7 @@ module.exports = function () { 'env' : 'dev', 'database' : 'example.com/database', 'web_port' : 1024, + 'tor_port' : 9050, 'amount_of_hours' : 7, 'hour_times' : [ '8:45 - 9:45', diff --git a/schedule.js b/schedule.js index 9628714..1cdcb19 100644 --- a/schedule.js +++ b/schedule.js @@ -1,7 +1,8 @@ //schedule.js -var http = require('http'); +var http = require('socks5-http-client'); var cheerio = require('cheerio'); var config = require('./configuration'); +var url = require('url'); //Wrapper function that is being called by express. function schedule(req, res, next) { @@ -13,7 +14,13 @@ function schedule(req, res, next) { //Function for getting the page via http. function get(url, callback) { - http.get(url, function (res) { + + var options = url.parse(url); + options.socksPort = config().tor_port; + + console.log(options); + + http.get(options, function (res) { var _download = ''; res.on('data', function (data) { diff --git a/spider.js b/spider.js index 2d0f917..cdca2d8 100644 --- a/spider.js +++ b/spider.js @@ -1,8 +1,9 @@ -var http = require('http'); +var http = require('socsk5-http-client'); var cheerio = require('cheerio'); var iconv = require('iconv-lite'); var mongodb = require('mongodb').MongoClient; var config = require('./configuration'); +var url = require('url'); var scheduletypes = [ 'Klasrooster', @@ -21,9 +22,10 @@ function get() { (function (scheduletype) { - var link = 'http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + school_id + '&type=' + scheduletype; + var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + school_id + '&type=' + scheduletype); + options.socksPort = config().tor_port; - http.get(link, function (res) { + http.get(options, function (res) { var _download = {}; _download.type = scheduletype; @@ -58,8 +60,10 @@ function rip(data) { for(studentcategory of list) { (function (studentcategory) { + var options = url.parse('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + school_id + '&type=' + data.type + '&afdeling=' + studentcategory); + options.socksPort = config().tor_port; - http.get('http://roosters5.gepro-osi.nl/roosters/rooster.php?school=' + school_id + '&type=' + data.type + '&afdeling=' + studentcategory, function (res) { + http.get(options, function (res) { var _download = ''; res.on('data', function (data) {