...
 
Commits (2)
COOKIE=SimpleSAMLSessionID=4122db6ddf0fe90619bdc8dd2bf13936; SimpleSAMLAuthToken=_2a1bbed52df07babc4cdc5ab13eda39486b48b1426; PHPSESSID=75t4a8q94hsedu8cctqsgnuol5; session_end=Tue+Jan+07+2020+18%3A26%3A46+UTC%2B0000
\ No newline at end of file
books/
img/
node_modules/
\ No newline at end of file
node_modules/
resources/
\ No newline at end of file
......@@ -18,7 +18,8 @@ In `books.json` add entries such as: (there are existing entries which I have pr
"bookslug": {
"name": "book name",
"url": "https://pearson.not.epic/path/to/bookpage-{{id}}.jpg", // image url of book page, replace id with {{id}}
"pages": 5 // total number of pages
"pages": 5, // total number of pages
"resources": { ... } // optional, see below
},
...
}
......@@ -54,3 +55,39 @@ And you can change the URL above in two steps to get the image anyhow.
- Add `images/` after `/OPS/`
- Change `.xhtml` to `.jpg`
## Finding resource (i.e. solution bank) range
Open the book viewer, go to any excersise and open the resource.
![open resource](https://owo.insrt.uk/ApK05kfymC5GZg5EWEnwu.png)
You should get a URL such as:
```text
https://www.activeteachonline.com/default/player/document/id/000000/external/0/uid/000000
```
The id range can be found by decrementing the first `/id/000000` and also incrementing it in the other direction.
You should find its lower and upper bound and use that in the next step.
Also, make a note of the `/uid/000000` in the url.
Hence, add an array to the JSON file specifying the range.
```json
{
"lower": 662039, // the lower bound
"upper": 662153, // upper bound
"uid": 357726 // your user id
}
```
To download resources, you also need the Cookie header.
Create a new file called `.env` and ensure cookie is set:
```env
COOKIE=SimpleSAMLSessionID=...; SimpleSAMLAuthToken=...; PHPSESSID=...; session_end=...
```
......@@ -37,12 +37,22 @@
"puremathyr1": {
"name": "AS Pure Mathematics Year 1",
"url": "https://resources.pearsonactivelearn.com/r00/r0066/r006623/r00662374/current/OPS/images/Pure_Maths_1-{{id}}.jpg",
"pages": 408
"pages": 408,
"resources": {
"lower": 662039,
"upper": 662153,
"uid": 357726
}
},
"statsmechmathyr1": {
"name": "AS Statistics and Mechanics Maths Year 1",
"url": "https://resources.pearsonactivelearn.com/r00/r0067/r006740/r00674039/current/OPS/images/stats_and_mech_1_combined-{{id}}.jpg",
"pages": 240
"pages": 240,
"resources": {
"lower": 682216,
"upper": 682277,
"uid": 357726
}
},
"physicsa1": {
"name": "AS OCR Physics A: Second Edition Year 1",
......@@ -52,6 +62,21 @@
"puremathyr2": {
"name": "A Level Pure Mathematics Year 2",
"url": "https://resources.pearsonactivelearn.com/r00/r0068/r006800/r00680025/current/OPS/images/692597-{{id}}.jpg",
"pages": 432
"pages": 432,
"resources": {
"lower": 695246,
"upper": 695405,
"uid": 357726
}
},
"statsmechmathyr2": {
"name": "Statistics and Mechanics Maths Year 2",
"url": "https://resources.pearsonactivelearn.com/r00/r0070/r007037/r00703733/current/OPS/images/New_SM2-{{id}}.jpg",
"pages": 216,
"resources": {
"lower": 711032,
"upper": 711078,
"uid": 357726
}
}
}
\ No newline at end of file
......@@ -4,6 +4,7 @@ const fs = require('fs');
const request = require('request');
const PDFDocument = require('pdfkit');
const sizeOf = require('image-size');
const HTMLParser = require('fast-html-parser');
fs.existsSync('./books') || fs.mkdirSync('./books');
fs.existsSync('./img') || fs.mkdirSync('./img');
......@@ -54,10 +55,63 @@ function pdf() {
next();
}
const { config } = require('dotenv');
config();
let axios = require('axios');
function dlResource(id, uid, bid) {
return new Promise(async (resolve, reject) => {
let dest = path.resolve('resources', bid);
if (fs.existsSync(path.resolve(dest, id + '.pdf'))) return resolve();
try {
let res = await axios.get(`https://www.activeteachonline.com/default/player/document/id/${id}/external/0/uid/${uid}`,
{
headers: {
Cookie: process.env.COOKIE
},
maxRedirects: 1
});
let $ = HTMLParser.parse(res.data);
let object = $.querySelector('object');
if (object === null) return resolve();
let fn = object.attributes.data;
fs.existsSync('resources') || fs.mkdirSync('resources');
fs.existsSync(dest) || fs.mkdirSync(dest);
request(fn)
.pipe(fs.createWriteStream(path.resolve(dest, id + '.pdf')))
.on('finish', resolve);
} catch(e) {
console.log(chalk`Failed dl: {red ${e}}`);
resolve();
}
});
}
async function resourceDownloader() {
for (let book in books) {
let b = books[book];
if (b.resources) {
console.log(chalk`{bold Downloading {gray resources} for {blue ${b.name}}}`);
for (let i=b.resources.lower;i<=b.resources.upper;i++) {
console.log(chalk`{gray Resource {green ${i - b.resources.lower + 1}} / {green ${b.resources.upper - b.resources.lower}}.}`);
await dlResource(i, b.resources.uid, book);
}
}
}
}
function next() {
i++;
current = queue.shift();
if (!current) return console.log(chalk.green('Finished downloading!'));
if (!current) {
console.log(chalk.green('Finished downloading!'));
resourceDownloader();
return;
}
console.log(chalk`{bold Downloading {blue ${current.name}}.}`);
fs.existsSync(current.images) || fs.mkdirSync(current.images);
j = 0;
......
......@@ -4,7 +4,10 @@
"description": "Download online text books from Pearson Active Learn",
"main": "index.js",
"dependencies": {
"axios": "^0.19.0",
"chalk": "^2.4.2",
"dotenv": "^8.2.0",
"fast-html-parser": "^1.0.1",
"image-size": "^0.7.1",
"pdfkit": "^0.8.3",
"request": "^2.88.0"
......
This diff is collapsed.