From 68c2800aec2730854a9ecdc34b5719edc5f0eca4 Mon Sep 17 00:00:00 2001 From: Filip Znachor Date: Sat, 23 Apr 2022 14:49:12 +0200 Subject: [PATCH] Improved downloader module --- downloader.ts | 131 ++++++++++++++++++++++++++++---------------------- links.ts | 8 +-- urlpool.ts | 42 +++++++++++----- webserver.ts | 48 +++++++----------- 4 files changed, 128 insertions(+), 101 deletions(-) diff --git a/downloader.ts b/downloader.ts index b85f37e..e1834a5 100644 --- a/downloader.ts +++ b/downloader.ts @@ -4,91 +4,108 @@ import { UrlPool } from "./urlpool"; export class Downloader { pool: UrlPool; - chunks: Buffer[] = []; - downloaded_chunks = 0; - total_chunks = 0; + chunks: {[U: string]: (Buffer | null)} = {}; - chunk_size = 512*1024; - total_size = 0; + chunk_size = Math.round(0.1*1024*1024); - ready: boolean = false; + loading: boolean = false; + destroyed: boolean = false; - constructor(pool: UrlPool) { + from: number; + + constructor(pool: UrlPool, from: number) { this.pool = pool; + this.from = from; } - async init() { - let url = await this.pool.get(); - if(!url) throw "No available URL in pool!"; - let r = await axios.get(url[1], { - responseType: 'arraybuffer', - headers: { - Range: `bytes=0-0` + first_chunk(): Buffer | undefined { + let keys = Object.keys(this.chunks); + let key = keys[0]; + if(!key) return; + let first_chunk = this.chunks[key]; + if(first_chunk instanceof Buffer) { + delete this.chunks[key]; + this.from += this.chunk_size; + return first_chunk!; + } + } + + collect() { + let collected: Buffer[] = []; + let first_chunk = this.first_chunk(); + while(first_chunk) { + collected.push(first_chunk); + first_chunk = this.first_chunk(); + } + if(collected.length) return Buffer.concat(collected); + return null; + } + + cache() { + let chunks: string[] = []; + let existing_chunks = Object.keys(this.chunks); + for(let i=0; i<15; i++) { + chunks.push((this.from+(this.chunk_size*i)).toString()); + } + chunks.forEach(from => { + if(existing_chunks.indexOf(from) == -1) { + this.download_part(parseInt(from), parseInt(from)+(this.chunk_size-1)); + this.chunks[from] = null; } }); - this.total_size = parseInt(r.headers["content-range"].split("/")[1]); - this.pool.return(url[0]); } - async download_range(from: number, to: number): Promise { - return new Promise(async (complete) => { - - if(to > this.total_size) to = this.total_size; - - let chunk_count = Math.max(Math.ceil((to-from)/this.chunk_size), 1); - let chunks: Buffer[] = []; - let completed = 0; - - for(let i=0; i { - chunks[i] = result; - completed++; - if(completed == chunk_count) { - complete(Buffer.concat(chunks)); - } - }); - - } + async more(): Promise { + if(this.loading) return false; + this.loading = true; + if(this.from > this.pool.total_size-1) return null; + this.cache(); + let promise: Promise = new Promise((resolve) => { + let wait_for_result = () => { + let result = this.collect(); + if(result || this.destroyed) { + this.loading = false; + resolve(result); + clearInterval(interval); + if(result) console.log(this.pool.id, "| sending:", Math.round(result.length/1024), "kB"); + } + }; + let interval = setInterval(wait_for_result, 100); + wait_for_result(); }); + return promise; } - async download_chunk(from: number, to: number, part: number): Promise { + async download_part(from: number, to: number) { + + if(to > this.pool.total_size-1) to = this.pool.total_size-1; + if(from > this.pool.total_size-1) return; let url = await this.pool.get(); if(!url) throw "No available URL!"; - let lfrom = from + (part * this.chunk_size) + (part > 0 ? 1 : 0); - let lto = Math.min(from + (part * this.chunk_size) + this.chunk_size, to); - if(lfrom == lto) lfrom--; + if(this.destroyed == true) { + this.pool.return(url[0]); + return; + } let r = await axios.get(url[1], { responseType: 'arraybuffer', headers: { - Range: `bytes=${lfrom}-${lto}` + Range: `bytes=${from}-${to}` } - }); + }); + this.chunks[from.toString()] = r.data; this.pool.return(url[0]); - return r.data; } - async download_part(i: number) { - - let chunk_size = 10_000_000; - let from = i*chunk_size; - let to = (from+chunk_size-1); - if(to > this.total_size) to = this.total_size; - - if(this.chunks[i] || from > this.total_size) return false; - this.total_chunks++; - - this.chunks[i] = await this.download_range(from, to); - this.downloaded_chunks++; - return true; - + destroy() { + this.destroyed = true; + let index = this.pool.downloaders.indexOf(this); + this.pool.downloaders.splice(index, 1); } } \ No newline at end of file diff --git a/links.ts b/links.ts index 9c64f73..c7b4e5e 100644 --- a/links.ts +++ b/links.ts @@ -39,6 +39,7 @@ export class Links { let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id; let redirect = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null}); + if(redirect.status !== 302) throw new Error(`Status code: ${redirect.status}`); this.tor.torNewSession(); if(redirect.headers.location && redirect.headers.location.startsWith("https://download.uloz.to")) return redirect.headers.location; @@ -48,9 +49,9 @@ export class Links { async captcha_link(id: string): Promise { let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id; - let captcha_page = await this.inst.get(download_link, {maxRedirects: 0}); + let captcha_page = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null}); - if(captcha_page.status !== 200) throw new Error("Status code is not 200"); + if(captcha_page.status !== 200) throw new Error(`Status code: ${captcha_page.status}`); let cookies = this.parse_cookie(captcha_page.headers["set-cookie"]); let url = this.regex_parse(//gm, captcha_page.data, 1); @@ -78,7 +79,8 @@ export class Links { "X-Requested-With": "XMLHttpRequest", "User-Agent": "Go-http-client/1.1", "Cookie": cookies - } + }, + validateStatus: null }); this.tor.torNewSession(); return result.data.slowDownloadLink; diff --git a/urlpool.ts b/urlpool.ts index 0c02787..2614ef3 100644 --- a/urlpool.ts +++ b/urlpool.ts @@ -11,23 +11,42 @@ function sleep(ms: number) { export class UrlPool { id: string; - is_direct = false; + urls: string[] = []; used: boolean[] = []; + generating = false; - downloader?: Downloader; - valid = false; + downloaders: Downloader[] = []; + + total_size: number = 0; + is_direct = false; + ready = false; constructor(id: string) { this.id = id; } async init() { + let page = await axios.get("https://uloz.to/file/"+this.id, {validateStatus: null}); if(page.status == 404) return false; // TODO: Add quick download support // let quick_dl_url = links.regex_parse(new RegExp('href="(/quickDownload/[^"]*)"'), page.data, 1); this.is_direct = 'js-free-download-button-direct' == links.regex_parse(new RegExp('data-href="/download-dialog/free/[^"]+" +class=".+(js-free-download-button-direct).+"'), page.data, 1); + + await this.generate(); + let url = await this.get(); + if(!url) throw "No available URL in pool!"; + let r = await axios.get(url[1], { + responseType: 'arraybuffer', + headers: { + Range: `bytes=0-0` + } + }); + this.total_size = parseInt(r.headers["content-range"].split("/")[1]); + this.return(url[0]); + + this.ready = true; return true; } @@ -74,8 +93,8 @@ export class UrlPool { async start_generation() { this.generating = true; - while(this.urls.length < 10) { - this.generate(); + while(this.urls.length < 15 && this.generating) { + console.log(this.id, "| new link:", (await this.generate()) ? true : false); await sleep(2000); } this.generating = false; @@ -85,17 +104,18 @@ export class UrlPool { try { let link = await (this.is_direct ? links.direct_link(this.id) : links.captcha_link(this.id)); if(link) this.add(link); - console.log(link); return link; - } catch { + } catch(e) { + console.log(e); + this.generating = false; return undefined; } } - get_downloader() { - if(this.downloader) return this.downloader; - this.downloader = new Downloader(this) - return this.downloader; + get_downloader(from: number) { + let d = new Downloader(this, from); + this.downloaders.push(d); + return d; } } diff --git a/webserver.ts b/webserver.ts index 381e6db..08750da 100644 --- a/webserver.ts +++ b/webserver.ts @@ -1,5 +1,4 @@ import express from "express"; -import { Downloader } from "./downloader"; import { UrlPoolStorage } from "./urlpool"; import { Readable } from 'stream'; @@ -8,9 +7,6 @@ let storage = new UrlPoolStorage; export class Webserver { - chunk_size = 5*1024*1024; - partial_size = this.chunk_size*10; - constructor() { const app = express(); @@ -24,11 +20,17 @@ export class Webserver { return; } + res.writeHead(200, { + "Content-Type": "application/json" + }); res.write(JSON.stringify({ streams: { total: p.urls.length, available: p.available().length, - generating: p.generating + generating: p.generating, + urls: p.urls, + total_size: p.total_size, + downloaders: p.downloaders.length } })); res.end(); @@ -43,40 +45,33 @@ export class Webserver { res.end(); return; } - let d = p.get_downloader(); let range: Range = {from: 0, to: null}; if(req.headers.range) range = this.parse_range(req.headers.range); if(!range.from) range.from = 0; - if(!d.ready) await d.init(); + let d = p.get_downloader(range.from); - let [from, to] = this.from_to([range.from, range.from+this.chunk_size], d.total_size); - - let contentLength = d.total_size-from; + let contentLength = p.total_size-1-range.from; let headers = { - "Content-Range": `bytes ${from}-${d.total_size}/${d.total_size+1}`, - "Range": `bytes=${from}-${d.total_size}/${d.total_size+1}`, + "Content-Range": `bytes ${range.from}-${p.total_size-1}/${p.total_size-1}`, + "Range": `bytes=${range.from}-${p.total_size-1}/${p.total_size-1}`, "Accept-Ranges": "bytes", "Content-Length": contentLength, "Content-Type": "application/octet-stream", }; res.writeHead(206, headers); + res.on("close", () => { + d.destroy(); + }); - const readable = new Readable() + const readable = new Readable(); readable._read = async () => { - [from, to] = this.from_to([to+1, to+1+this.chunk_size], d.total_size); - if(from == d.total_size) { - readable.push(null); - //res.end(); - } - else { - let stream = await this.download_chunk(d, from, to); - readable.push(stream); - } + let stream = await d.more(); + readable.push(stream); } - let stream = await this.download_chunk(d, from, to); + let stream = await d.more(); readable.push(stream); readable.pipe(res); @@ -88,13 +83,6 @@ export class Webserver { } - async download_chunk(d: Downloader, from: number, to: number) { - console.log("downloading...", from, to); - let stream = await d.download_range(from, to); - console.log("downloaded ", from, to); - return stream; - } - parse_range(input: string): Range { let [from, to]: (number|null)[] = [null, null];