From 01ced0b7ce47d279789efb2dc70d1cd009ac56ad Mon Sep 17 00:00:00 2001 From: davidovski Date: Sat, 9 Oct 2021 22:20:41 +0100 Subject: initial commit --- scripts/reddit-scrape | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 scripts/reddit-scrape (limited to 'scripts/reddit-scrape') diff --git a/scripts/reddit-scrape b/scripts/reddit-scrape new file mode 100755 index 0000000..6ea7f72 --- /dev/null +++ b/scripts/reddit-scrape @@ -0,0 +1,43 @@ +#!/bin/sh + +#cfg +useragent="Love by u/gadelat" +timeout=60 + +subreddit=$1 +sort=$2 +top_time=$3 + +if [ -z $sort ]; then + sort="hot" +fi + +if [ -z $top_time ];then + top_time="" +fi + +url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time" +content=`wget -T $timeout -U "$useragent" -q -O - $url` +mkdir -p $subreddit +while : ; do + urls=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url') + names=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title') + ids=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id') + a=1 + wait # prevent spawning too many processes + for url in $urls; do + name=`echo -n "$names"|sed -n "$a"p` + id=`echo -n "$ids"|sed -n "$a"p` + ext=`echo -n "${url##*.}"|cut -d '?' -f 1` + newname=`echo $name | sed "s/^\///;s/\// /g"`_"$subreddit"_$id.$ext + echo $name + wget -T $timeout -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null & + a=$(($a+1)) + done + after=$(echo -n "$content"| jq -r '.data.after//empty') + if [ -z $after ]; then + break + fi + url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time" + content=`wget -T $timeout -U "$useragent" --no-check-certificate -q -O - $url` +done -- cgit v1.2.1