Verified Commit 9e9be8ce authored by insert's avatar insert

Add a bunch of books.

parent 85a289c6
COOKIE=SimpleSAMLSessionID=4122db6ddf0fe90619bdc8dd2bf13936; SimpleSAMLAuthToken=_2a1bbed52df07babc4cdc5ab13eda39486b48b1426; PHPSESSID=75t4a8q94hsedu8cctqsgnuol5; session_end=Tue+Jan+07+2020+18%3A26%3A46+UTC%2B0000
\ No newline at end of file
COOKIE=PHPSESSID=v2o98akmnjot9hvpgud3lpqcq3; session_end=Mon+May+25+2020+15%3A56%3A06+UTC%2B0000
\ No newline at end of file
books/
img/
node_modules/
books/
img/
node_modules/
resources/
\ No newline at end of file
# pearson-succ
> This tool converts your Active Learn books to PDF format.
Pearson doesn't provide a PDF version of books because who needs convenience?
I made this because Active Learn is pretty pointless when on mobile, or better still when you have no internet.
## Usage
Before you can download, you need to specify the book URLs.
In `books.json` add entries such as: (there are existing entries which I have previously downloaded)
```json
{
// guide for getting URLs is below
"bookslug": {
"name": "book name",
"url": "https://pearson.not.epic/path/to/bookpage-{{id}}.jpg", // image url of book page, replace id with {{id}}
"pages": 5, // total number of pages
"resources": { ... } // optional, see below
},
...
}
```
## Finding book URL
Open book viewer and right click any page, and inspect element:
![inspect element](https://owo.insrt.uk/faqdMNRLQPDAF8I2qA2UG.png)
Find and copy the URL that looks like this:
![element](https://owo.insrt.uk/-mTzgWaduuWIoIS_QaWXw.png)
Open the page, right-click the image, and press view image.
You should get a URL such as:
```text
https://resources.pearsonactivelearn.com/r00/r0000/r000000/r00000000/current/OPS/images/BOOK_TITLE-001.jpg
```
Replace `001` or whatever three-digit number with `{{id}}`.
In case you are lost, the page URL looks like:
```text
https://resources.pearsonactivelearn.com/r00/r0000/r000000/r00000000/current/OPS/BOOK_TITLE-001.xhtml
```
And you can change the URL above in two steps to get the image anyhow.
- Add `images/` after `/OPS/`
- Change `.xhtml` to `.jpg`
## Finding resource (i.e. solution bank) range
Open the book viewer, go to any excersise and open the resource.
![open resource](https://owo.insrt.uk/ApK05kfymC5GZg5EWEnwu.png)
You should get a URL such as:
```text
https://www.activeteachonline.com/default/player/document/id/000000/external/0/uid/000000
```
The id range can be found by decrementing the first `/id/000000` and also incrementing it in the other direction.
You should find its lower and upper bound and use that in the next step.
Also, make a note of the `/uid/000000` in the url.
Hence, add an array to the JSON file specifying the range.
```json
{
"lower": 662039, // the lower bound
"upper": 662153, // upper bound
"uid": 357726 // your user id
}
```
To download resources, you also need the Cookie header.
Create a new file called `.env` and ensure cookie is set:
```env
COOKIE=SimpleSAMLSessionID=...; SimpleSAMLAuthToken=...; PHPSESSID=...; session_end=...
```
# pearson-succ
> This tool converts your Active Learn books to PDF format.
Pearson doesn't provide a PDF version of books because who needs convenience?
I made this because Active Learn is pretty pointless when on mobile, or better still when you have no internet.
## Usage
Before you can download, you need to specify the book URLs.
In `books.json` add entries such as: (there are existing entries which I have previously downloaded)
```json
{
// guide for getting URLs is below
"bookslug": {
"name": "book name",
"url": "https://pearson.not.epic/path/to/bookpage-{{id}}.jpg", // image url of book page, replace id with {{id}}
"pages": 5, // total number of pages
"resources": { ... } // optional, see below
},
...
}
```
## Finding book URL
Open book viewer and right click any page, and inspect element:
![inspect element](https://owo.insrt.uk/faqdMNRLQPDAF8I2qA2UG.png)
Find and copy the URL that looks like this:
![element](https://owo.insrt.uk/-mTzgWaduuWIoIS_QaWXw.png)
Open the page, right-click the image, and press view image.
You should get a URL such as:
```text
https://resources.pearsonactivelearn.com/r00/r0000/r000000/r00000000/current/OPS/images/BOOK_TITLE-001.jpg
```
Replace `001` or whatever three-digit number with `{{id}}`.
In case you are lost, the page URL looks like:
```text
https://resources.pearsonactivelearn.com/r00/r0000/r000000/r00000000/current/OPS/BOOK_TITLE-001.xhtml
```
And you can change the URL above in two steps to get the image anyhow.
- Add `images/` after `/OPS/`
- Change `.xhtml` to `.jpg`
## Finding resource (i.e. solution bank) range
Open the book viewer, go to any excersise and open the resource.
![open resource](https://owo.insrt.uk/ApK05kfymC5GZg5EWEnwu.png)
You should get a URL such as:
```text
https://www.activeteachonline.com/default/player/document/id/000000/external/0/uid/000000
```
The id range can be found by decrementing the first `/id/000000` and also incrementing it in the other direction.
You should find its lower and upper bound and use that in the next step.
Also, make a note of the `/uid/000000` in the url.
Hence, add an array to the JSON file specifying the ranges you want to download.
```json
[
{
"lower": 662039, // the lower bound
"upper": 662153, // upper bound
"uid": 357726 // your user id
},
..
]
```
To download resources, you also need the Cookie header.
Create a new file called `.env` and ensure cookie is set:
```env
COOKIE=SimpleSAMLSessionID=...; SimpleSAMLAuthToken=...; PHPSESSID=...; session_end=...
```
> Note: if you start getting "Max redirects exceeded." errors, you need to update your cookie.
This diff is collapsed.
[695250,695251,695256,695257,695258,695259,695261,695264,695266,695268,695270,695273,695274,695277,695278,695282,695283,695289,695290,695291,695297,695298,695301,695302,695304,695307,695311,695314,695315,695317,695318,695321,695326,695327,695330,695333,695335,695336,695338,695340,695342,695343,695344,695348,695350,695354,695357,695360,695364,695366,695367,695368,695370,695374,695375,695379,695380,695385,695387,695389,695391,695394,695396,695397,695398,695399,695400,695402,828934,828935,828936,828937,828938,828939,828940,828941,828942,828943,828944,828945,828946,828947,828948,828949,828950,828951,828952,828953,828954,828955,828956,828957,828958,828959,828960,828961,828962,828963,828965,828966,828967,828968,828969,828970,828971,828972,828973,804831,804832,804833,804834,804835,804836,804837,804838,804839,804840,804841,804842,804843,804844,804845,804846,804847,804848,804849,804850,804851,804852,804853,804854,804855,804856,804857,804858,804859,804860,804861,804862,804863,804864,804865,846803,846804,846805,846806,846807,846808,846809,846810,846811,846812,846813,846814,846815,846816,846817,846818,846819,846820,846821,846822,846823,846824,846825,846826,846827,846828,846829,846830,846831,846832,846833,846834,846835,846836,846837]
\ No newline at end of file
const chalk = require('chalk');
const path = require('path');
const fs = require('fs');
const request = require('request');
const PDFDocument = require('pdfkit');
const sizeOf = require('image-size');
const HTMLParser = require('fast-html-parser');
fs.existsSync('./books') || fs.mkdirSync('./books');
fs.existsSync('./img') || fs.mkdirSync('./img');
const books = require('./books.json');
let queue = [];
for (let book in books) {
let data = { id: book, output: path.resolve('./books', book + '.pdf'), images: path.resolve('./img', book) };
if (fs.existsSync(data.output)) continue;
queue.push(Object.assign(books[book], data));
}
let current;
let i = -1;
let j = 0;
function getID(id) {
id += '';
while (id.length < 3) id = '0' + id;
return id;
}
function pull() {
j++;
if (j > current.pages) return pdf();
let id = getID(j);
if (fs.existsSync(path.resolve(current.images, id + '.jpg'))) return pull();
console.log(chalk`{gray Page {green ${j}} / {green ${current.pages}}. [${id}]}`);
request(current.url.replace(/{{id}}/g, id))
.pipe(fs.createWriteStream(path.resolve(current.images, id + '.jpg')))
.on('finish', pull);
}
function pdf() {
console.log(chalk`{bold Generating {red pdf} for {blue ${current.name}}.}`);
let dimensions = sizeOf(path.resolve(current.images, '001.jpg'));
let doc = new PDFDocument({size: [dimensions.width, dimensions.height]});
doc.pipe(fs.createWriteStream(current.output));
let images = new Array(current.pages).fill({ fit: [dimensions.width, dimensions.height], align: 'center', valign: 'center' });
images.forEach((v, i) => {
if (i != 0) doc.addPage();
doc.image(path.resolve(current.images, getID(i + 1) + '.jpg'), 0, 0, v);
});
doc.end();
next();
}
const { config } = require('dotenv');
config();
let axios = require('axios');
function dlResource(id, uid, bid) {
return new Promise(async (resolve, reject) => {
let dest = path.resolve('resources', bid);
if (fs.existsSync(path.resolve(dest, id + '.pdf'))) return resolve();
try {
let res = await axios.get(`https://www.activeteachonline.com/default/player/document/id/${id}/external/0/uid/${uid}`,
{
headers: {
Cookie: process.env.COOKIE
},
maxRedirects: 1
});
let $ = HTMLParser.parse(res.data);
let object = $.querySelector('object');
if (object === null) return resolve();
let fn = object.attributes.data;
fs.existsSync('resources') || fs.mkdirSync('resources');
fs.existsSync(dest) || fs.mkdirSync(dest);
request(fn)
.pipe(fs.createWriteStream(path.resolve(dest, id + '.pdf')))
.on('finish', resolve);
} catch(e) {
console.log(chalk`Failed dl: {red ${e}}`);
resolve();
}
});
}
async function resourceDownloader() {
for (let book in books) {
let b = books[book];
if (b.resources) {
console.log(chalk`{bold Downloading {gray resources} for {blue ${b.name}}}`);
for (let i=b.resources.lower;i<=b.resources.upper;i++) {
console.log(chalk`{gray Resource {green ${i - b.resources.lower + 1}} / {green ${b.resources.upper - b.resources.lower}}.}`);
await dlResource(i, b.resources.uid, book);
}
}
}
}
function next() {
i++;
current = queue.shift();
if (!current) {
console.log(chalk.green('Finished downloading!'));
resourceDownloader();
return;
}
console.log(chalk`{bold Downloading {blue ${current.name}}.}`);
fs.existsSync(current.images) || fs.mkdirSync(current.images);
j = 0;
pull();
}
const chalk = require('chalk');
const path = require('path');
const fs = require('fs');
const request = require('request');
const PDFDocument = require('pdfkit');
const sizeOf = require('image-size');
const HTMLParser = require('fast-html-parser');
fs.existsSync('./books') || fs.mkdirSync('./books');
fs.existsSync('./img') || fs.mkdirSync('./img');
const books = require('./books.json');
let queue = [];
for (let book in books) {
let data = { id: book, output: path.resolve('./books', book + '.pdf'), images: path.resolve('./img', book) };
if (fs.existsSync(data.output)) continue;
queue.push(Object.assign(books[book], data));
}
let current;
let i = -1;
let j = 0;
function getID(id) {
id += '';
while (id.length < 3) id = '0' + id;
return id;
}
function pull() {
j++;
if (j > current.pages) return pdf();
let id = getID(j);
if (fs.existsSync(path.resolve(current.images, id + '.jpg'))) return pull();
console.log(chalk`{gray Page {green ${j}} / {green ${current.pages}}. [${id}]}`);
request(current.url.replace(/{{id}}/g, id))
.pipe(fs.createWriteStream(path.resolve(current.images, id + '.jpg')))
.on('finish', pull);
}
function pdf() {
console.log(chalk`{bold Generating {red pdf} for {blue ${current.name}}.}`);
let dimensions = sizeOf(path.resolve(current.images, '001.jpg'));
let doc = new PDFDocument({size: [dimensions.width, dimensions.height]});
doc.pipe(fs.createWriteStream(current.output));
let images = new Array(current.pages).fill({ fit: [dimensions.width, dimensions.height], align: 'center', valign: 'center' });
images.forEach((v, i) => {
if (i != 0) doc.addPage();
doc.image(path.resolve(current.images, getID(i + 1) + '.jpg'), 0, 0, v);
});
doc.end();
next();
}
const { config } = require('dotenv');
config();
const resource_ignorer = {
ignored: require('./ignored_ids.json'),
is: (id) => {
if (resource_ignorer.ignored.includes(id)) {
return true;
}
return false;
},
add: (id) => {
resource_ignorer.ignored.push(id);
fs.writeFileSync('ignored_ids.json', JSON.stringify(resource_ignorer.ignored));
}
};
let axios = require('axios');
function dlResource(id, uid, bid, didDoingDownload) {
return new Promise(async (resolve, reject) => {
let dest = path.resolve('resources', bid);
if (fs.existsSync(path.resolve(dest, id + '.pdf'))) return resolve();
didDoingDownload();
try {
let res = await axios.get(`https://www.activeteachonline.com/default/player/document/id/${id}/external/0/uid/${uid}`,
{
headers: {
Cookie: process.env.COOKIE
},
maxRedirects: 1
});
let $ = HTMLParser.parse(res.data);
let object = $.querySelector('object');
if (object === null) {
resource_ignorer.add(id);
return resolve();
}
let fn = object.attributes.data;
fs.existsSync('resources') || fs.mkdirSync('resources');
fs.existsSync(dest) || fs.mkdirSync(dest);
request(fn)
.pipe(fs.createWriteStream(path.resolve(dest, id + '.pdf')))
.on('finish', resolve);
} catch(e) {
console.log(chalk`Failed dl: {red ${e}}`);
resolve();
}
});
}
async function resourceDownloader() {
for (let book in books) {
let b = books[book];
if (b.resources) {
console.log(chalk`{bold Downloading {gray resources} for {blue ${b.name}}}`);
for (let range of b.resources) {
for (let i=range.lower;i<=range.upper;i++) {
if (resource_ignorer.is(i)) continue;
await dlResource(i, range.uid, book, () => {
console.log(chalk`{gray Resource {green ${i - range.lower + 1}} / {green ${range.upper - range.lower}}. (${i})}`);
});
}
}
}
}
}
function next() {
i++;
current = queue.shift();
if (!current) {
console.log(chalk.green('Finished downloading!'));
resourceDownloader();
return;
}
console.log(chalk`{bold Downloading {blue ${current.name}}.}`);
fs.existsSync(current.images) || fs.mkdirSync(current.images);
j = 0;
pull();
}
next();
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment