summaryrefslogtreecommitdiff
path: root/scripts/.scripts/reddit-scrape
diff options
context:
space:
mode:
authordavidovski <david@davidovski.xyz>2021-10-09 22:20:41 +0100
committerdavidovski <david@davidovski.xyz>2021-10-09 22:20:41 +0100
commit01ced0b7ce47d279789efb2dc70d1cd009ac56ad (patch)
tree6ece604b8ae3476d2d70c9c9d42f86fe607990da /scripts/.scripts/reddit-scrape
initial commit
Diffstat (limited to 'scripts/.scripts/reddit-scrape')
-rwxr-xr-xscripts/.scripts/reddit-scrape43
1 files changed, 43 insertions, 0 deletions
diff --git a/scripts/.scripts/reddit-scrape b/scripts/.scripts/reddit-scrape
new file mode 100755
index 0000000..6ea7f72
--- /dev/null
+++ b/scripts/.scripts/reddit-scrape
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+#cfg
+useragent="Love by u/gadelat"
+timeout=60
+
+subreddit=$1
+sort=$2
+top_time=$3
+
+if [ -z $sort ]; then
+ sort="hot"
+fi
+
+if [ -z $top_time ];then
+ top_time=""
+fi
+
+url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time"
+content=`wget -T $timeout -U "$useragent" -q -O - $url`
+mkdir -p $subreddit
+while : ; do
+ urls=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
+ names=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
+ ids=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
+ a=1
+ wait # prevent spawning too many processes
+ for url in $urls; do
+ name=`echo -n "$names"|sed -n "$a"p`
+ id=`echo -n "$ids"|sed -n "$a"p`
+ ext=`echo -n "${url##*.}"|cut -d '?' -f 1`
+ newname=`echo $name | sed "s/^\///;s/\// /g"`_"$subreddit"_$id.$ext
+ echo $name
+ wget -T $timeout -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
+ a=$(($a+1))
+ done
+ after=$(echo -n "$content"| jq -r '.data.after//empty')
+ if [ -z $after ]; then
+ break
+ fi
+ url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time"
+ content=`wget -T $timeout -U "$useragent" --no-check-certificate -q -O - $url`
+done