summaryrefslogtreecommitdiff
path: root/scripts/.scripts/reddit-scrape
diff options
context:
space:
mode:
authordavidovski <david@davidovski.xyz>2021-10-09 22:58:10 +0100
committerdavidovski <david@davidovski.xyz>2021-10-09 22:58:10 +0100
commit49fff5a029e08f31941c75c19a72d7e7eb1ed172 (patch)
tree9cab74542e7302f47a9b8efbe5b1c674f9eb2150 /scripts/.scripts/reddit-scrape
parent26d3e9c0db0b8591eba24de5437b3c75d9995064 (diff)
fixed scripts
Diffstat (limited to 'scripts/.scripts/reddit-scrape')
-rwxr-xr-xscripts/.scripts/reddit-scrape43
1 files changed, 0 insertions, 43 deletions
diff --git a/scripts/.scripts/reddit-scrape b/scripts/.scripts/reddit-scrape
deleted file mode 100755
index 6ea7f72..0000000
--- a/scripts/.scripts/reddit-scrape
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/sh
-
-#cfg
-useragent="Love by u/gadelat"
-timeout=60
-
-subreddit=$1
-sort=$2
-top_time=$3
-
-if [ -z $sort ]; then
- sort="hot"
-fi
-
-if [ -z $top_time ];then
- top_time=""
-fi
-
-url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time"
-content=`wget -T $timeout -U "$useragent" -q -O - $url`
-mkdir -p $subreddit
-while : ; do
- urls=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
- names=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
- ids=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
- a=1
- wait # prevent spawning too many processes
- for url in $urls; do
- name=`echo -n "$names"|sed -n "$a"p`
- id=`echo -n "$ids"|sed -n "$a"p`
- ext=`echo -n "${url##*.}"|cut -d '?' -f 1`
- newname=`echo $name | sed "s/^\///;s/\// /g"`_"$subreddit"_$id.$ext
- echo $name
- wget -T $timeout -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
- a=$(($a+1))
- done
- after=$(echo -n "$content"| jq -r '.data.after//empty')
- if [ -z $after ]; then
- break
- fi
- url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time"
- content=`wget -T $timeout -U "$useragent" --no-check-certificate -q -O - $url`
-done