Improved downloader module

This commit is contained in:
Filip Znachor 2022-04-23 14:49:12 +02:00
parent 84d4038c6d
commit 68c2800aec
4 changed files with 128 additions and 101 deletions

View file

@ -4,91 +4,108 @@ import { UrlPool } from "./urlpool";
export class Downloader { export class Downloader {
pool: UrlPool; pool: UrlPool;
chunks: Buffer[] = []; chunks: {[U: string]: (Buffer | null)} = {};
downloaded_chunks = 0;
total_chunks = 0;
chunk_size = 512*1024; chunk_size = Math.round(0.1*1024*1024);
total_size = 0;
ready: boolean = false; loading: boolean = false;
destroyed: boolean = false;
constructor(pool: UrlPool) { from: number;
constructor(pool: UrlPool, from: number) {
this.pool = pool; this.pool = pool;
this.from = from;
} }
async init() { first_chunk(): Buffer | undefined {
let url = await this.pool.get(); let keys = Object.keys(this.chunks);
if(!url) throw "No available URL in pool!"; let key = keys[0];
let r = await axios.get(url[1], { if(!key) return;
responseType: 'arraybuffer', let first_chunk = this.chunks[key];
headers: { if(first_chunk instanceof Buffer) {
Range: `bytes=0-0` delete this.chunks[key];
this.from += this.chunk_size;
return first_chunk!;
}
}
collect() {
let collected: Buffer[] = [];
let first_chunk = this.first_chunk();
while(first_chunk) {
collected.push(first_chunk);
first_chunk = this.first_chunk();
}
if(collected.length) return Buffer.concat(collected);
return null;
}
cache() {
let chunks: string[] = [];
let existing_chunks = Object.keys(this.chunks);
for(let i=0; i<15; i++) {
chunks.push((this.from+(this.chunk_size*i)).toString());
}
chunks.forEach(from => {
if(existing_chunks.indexOf(from) == -1) {
this.download_part(parseInt(from), parseInt(from)+(this.chunk_size-1));
this.chunks[from] = null;
} }
}); });
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
this.pool.return(url[0]);
} }
async download_range(from: number, to: number): Promise<Buffer> { async more(): Promise<Buffer | null | false> {
return new Promise(async (complete) => { if(this.loading) return false;
this.loading = true;
if(to > this.total_size) to = this.total_size; if(this.from > this.pool.total_size-1) return null;
let chunk_count = Math.max(Math.ceil((to-from)/this.chunk_size), 1);
let chunks: Buffer[] = [];
let completed = 0;
for(let i=0; i<chunk_count; i++) {
this.download_chunk(from, to, i).then((result) => {
chunks[i] = result;
completed++;
if(completed == chunk_count) {
complete(Buffer.concat(chunks));
}
});
}
this.cache();
let promise: Promise<Buffer | null> = new Promise((resolve) => {
let wait_for_result = () => {
let result = this.collect();
if(result || this.destroyed) {
this.loading = false;
resolve(result);
clearInterval(interval);
if(result) console.log(this.pool.id, "| sending:", Math.round(result.length/1024), "kB");
}
};
let interval = setInterval(wait_for_result, 100);
wait_for_result();
}); });
return promise;
} }
async download_chunk(from: number, to: number, part: number): Promise<Buffer> { async download_part(from: number, to: number) {
if(to > this.pool.total_size-1) to = this.pool.total_size-1;
if(from > this.pool.total_size-1) return;
let url = await this.pool.get(); let url = await this.pool.get();
if(!url) throw "No available URL!"; if(!url) throw "No available URL!";
let lfrom = from + (part * this.chunk_size) + (part > 0 ? 1 : 0); if(this.destroyed == true) {
let lto = Math.min(from + (part * this.chunk_size) + this.chunk_size, to); this.pool.return(url[0]);
if(lfrom == lto) lfrom--; return;
}
let r = await axios.get(url[1], { let r = await axios.get(url[1], {
responseType: 'arraybuffer', responseType: 'arraybuffer',
headers: { headers: {
Range: `bytes=${lfrom}-${lto}` Range: `bytes=${from}-${to}`
} }
}); });
this.chunks[from.toString()] = r.data;
this.pool.return(url[0]); this.pool.return(url[0]);
return r.data;
} }
async download_part(i: number) { destroy() {
this.destroyed = true;
let chunk_size = 10_000_000; let index = this.pool.downloaders.indexOf(this);
let from = i*chunk_size; this.pool.downloaders.splice(index, 1);
let to = (from+chunk_size-1);
if(to > this.total_size) to = this.total_size;
if(this.chunks[i] || from > this.total_size) return false;
this.total_chunks++;
this.chunks[i] = await this.download_range(from, to);
this.downloaded_chunks++;
return true;
} }
} }

View file

@ -39,6 +39,7 @@ export class Links {
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id; let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
let redirect = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null}); let redirect = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
if(redirect.status !== 302) throw new Error(`Status code: ${redirect.status}`);
this.tor.torNewSession(); this.tor.torNewSession();
if(redirect.headers.location && redirect.headers.location.startsWith("https://download.uloz.to")) if(redirect.headers.location && redirect.headers.location.startsWith("https://download.uloz.to"))
return redirect.headers.location; return redirect.headers.location;
@ -48,9 +49,9 @@ export class Links {
async captcha_link(id: string): Promise<string | undefined> { async captcha_link(id: string): Promise<string | undefined> {
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id; let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
let captcha_page = await this.inst.get(download_link, {maxRedirects: 0}); let captcha_page = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
if(captcha_page.status !== 200) throw new Error("Status code is not 200"); if(captcha_page.status !== 200) throw new Error(`Status code: ${captcha_page.status}`);
let cookies = this.parse_cookie(captcha_page.headers["set-cookie"]); let cookies = this.parse_cookie(captcha_page.headers["set-cookie"]);
let url = this.regex_parse(/<img class="xapca-image" src="([^"]*)" alt="">/gm, captcha_page.data, 1); let url = this.regex_parse(/<img class="xapca-image" src="([^"]*)" alt="">/gm, captcha_page.data, 1);
@ -78,7 +79,8 @@ export class Links {
"X-Requested-With": "XMLHttpRequest", "X-Requested-With": "XMLHttpRequest",
"User-Agent": "Go-http-client/1.1", "User-Agent": "Go-http-client/1.1",
"Cookie": cookies "Cookie": cookies
} },
validateStatus: null
}); });
this.tor.torNewSession(); this.tor.torNewSession();
return result.data.slowDownloadLink; return result.data.slowDownloadLink;

View file

@ -11,23 +11,42 @@ function sleep(ms: number) {
export class UrlPool { export class UrlPool {
id: string; id: string;
is_direct = false;
urls: string[] = []; urls: string[] = [];
used: boolean[] = []; used: boolean[] = [];
generating = false; generating = false;
downloader?: Downloader; downloaders: Downloader[] = [];
valid = false;
total_size: number = 0;
is_direct = false;
ready = false;
constructor(id: string) { constructor(id: string) {
this.id = id; this.id = id;
} }
async init() { async init() {
let page = await axios.get("https://uloz.to/file/"+this.id, {validateStatus: null}); let page = await axios.get("https://uloz.to/file/"+this.id, {validateStatus: null});
if(page.status == 404) return false; if(page.status == 404) return false;
// TODO: Add quick download support // TODO: Add quick download support
// let quick_dl_url = links.regex_parse(new RegExp('href="(/quickDownload/[^"]*)"'), page.data, 1); // let quick_dl_url = links.regex_parse(new RegExp('href="(/quickDownload/[^"]*)"'), page.data, 1);
this.is_direct = 'js-free-download-button-direct' == links.regex_parse(new RegExp('data-href="/download-dialog/free/[^"]+" +class=".+(js-free-download-button-direct).+"'), page.data, 1); this.is_direct = 'js-free-download-button-direct' == links.regex_parse(new RegExp('data-href="/download-dialog/free/[^"]+" +class=".+(js-free-download-button-direct).+"'), page.data, 1);
await this.generate();
let url = await this.get();
if(!url) throw "No available URL in pool!";
let r = await axios.get(url[1], {
responseType: 'arraybuffer',
headers: {
Range: `bytes=0-0`
}
});
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
this.return(url[0]);
this.ready = true;
return true; return true;
} }
@ -74,8 +93,8 @@ export class UrlPool {
async start_generation() { async start_generation() {
this.generating = true; this.generating = true;
while(this.urls.length < 10) { while(this.urls.length < 15 && this.generating) {
this.generate(); console.log(this.id, "| new link:", (await this.generate()) ? true : false);
await sleep(2000); await sleep(2000);
} }
this.generating = false; this.generating = false;
@ -85,17 +104,18 @@ export class UrlPool {
try { try {
let link = await (this.is_direct ? links.direct_link(this.id) : links.captcha_link(this.id)); let link = await (this.is_direct ? links.direct_link(this.id) : links.captcha_link(this.id));
if(link) this.add(link); if(link) this.add(link);
console.log(link);
return link; return link;
} catch { } catch(e) {
console.log(e);
this.generating = false;
return undefined; return undefined;
} }
} }
get_downloader() { get_downloader(from: number) {
if(this.downloader) return this.downloader; let d = new Downloader(this, from);
this.downloader = new Downloader(this) this.downloaders.push(d);
return this.downloader; return d;
} }
} }

View file

@ -1,5 +1,4 @@
import express from "express"; import express from "express";
import { Downloader } from "./downloader";
import { UrlPoolStorage } from "./urlpool"; import { UrlPoolStorage } from "./urlpool";
import { Readable } from 'stream'; import { Readable } from 'stream';
@ -8,9 +7,6 @@ let storage = new UrlPoolStorage;
export class Webserver { export class Webserver {
chunk_size = 5*1024*1024;
partial_size = this.chunk_size*10;
constructor() { constructor() {
const app = express(); const app = express();
@ -24,11 +20,17 @@ export class Webserver {
return; return;
} }
res.writeHead(200, {
"Content-Type": "application/json"
});
res.write(JSON.stringify({ res.write(JSON.stringify({
streams: { streams: {
total: p.urls.length, total: p.urls.length,
available: p.available().length, available: p.available().length,
generating: p.generating generating: p.generating,
urls: p.urls,
total_size: p.total_size,
downloaders: p.downloaders.length
} }
})); }));
res.end(); res.end();
@ -43,40 +45,33 @@ export class Webserver {
res.end(); res.end();
return; return;
} }
let d = p.get_downloader();
let range: Range = {from: 0, to: null}; let range: Range = {from: 0, to: null};
if(req.headers.range) range = this.parse_range(req.headers.range); if(req.headers.range) range = this.parse_range(req.headers.range);
if(!range.from) range.from = 0; if(!range.from) range.from = 0;
if(!d.ready) await d.init(); let d = p.get_downloader(range.from);
let [from, to] = this.from_to([range.from, range.from+this.chunk_size], d.total_size); let contentLength = p.total_size-1-range.from;
let contentLength = d.total_size-from;
let headers = { let headers = {
"Content-Range": `bytes ${from}-${d.total_size}/${d.total_size+1}`, "Content-Range": `bytes ${range.from}-${p.total_size-1}/${p.total_size-1}`,
"Range": `bytes=${from}-${d.total_size}/${d.total_size+1}`, "Range": `bytes=${range.from}-${p.total_size-1}/${p.total_size-1}`,
"Accept-Ranges": "bytes", "Accept-Ranges": "bytes",
"Content-Length": contentLength, "Content-Length": contentLength,
"Content-Type": "application/octet-stream", "Content-Type": "application/octet-stream",
}; };
res.writeHead(206, headers); res.writeHead(206, headers);
res.on("close", () => {
d.destroy();
});
const readable = new Readable() const readable = new Readable();
readable._read = async () => { readable._read = async () => {
[from, to] = this.from_to([to+1, to+1+this.chunk_size], d.total_size); let stream = await d.more();
if(from == d.total_size) { readable.push(stream);
readable.push(null);
//res.end();
}
else {
let stream = await this.download_chunk(d, from, to);
readable.push(stream);
}
} }
let stream = await this.download_chunk(d, from, to); let stream = await d.more();
readable.push(stream); readable.push(stream);
readable.pipe(res); readable.pipe(res);
@ -88,13 +83,6 @@ export class Webserver {
} }
async download_chunk(d: Downloader, from: number, to: number) {
console.log("downloading...", from, to);
let stream = await d.download_range(from, to);
console.log("downloaded ", from, to);
return stream;
}
parse_range(input: string): Range { parse_range(input: string): Range {
let [from, to]: (number|null)[] = [null, null]; let [from, to]: (number|null)[] = [null, null];