| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | """ |
|---|
| 5 | Catches URLs on channel and gives information about them like title, image size, etc. |
|---|
| 6 | Uses http://whatisthisfile.appspot.com/ via XMLRPC |
|---|
| 7 | |
|---|
| 8 | Example: |
|---|
| 9 | 19:20 <@raspi> http://www.youtube.com/watch?v=9RZ-hYPAMFQ |
|---|
| 10 | 19:20 <@bot> Title: "YouTube - Black Knight Holy Grail" |
|---|
| 11 | 19:28 <@raspi> test http://www.raspi.fi foobar http://raspi.fi/wp-includes/images/rss.png |
|---|
| 12 | 19:28 <@bot> 1. Title: "raspi.fi" Redirect: http://raspi.fi/ 2. Image: 14x14 |
|---|
| 13 | """ |
|---|
| 14 | |
|---|
| 15 | __author__ = u"Pekka 'raspi' JÀrvinen - http://raspi.fi/" |
|---|
| 16 | __license__ = 'BSD' |
|---|
| 17 | |
|---|
| 18 | from gozerbot.generic import handle_exception, rlog |
|---|
| 19 | from gozerbot.callbacks import callbacks |
|---|
| 20 | from gozerbot.commands import cmnds |
|---|
| 21 | from gozerbot.plughelp import plughelp |
|---|
| 22 | from gozerbot.persist.persist import Persist |
|---|
| 23 | from gozerbot.examples import examples |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | import re |
|---|
| 27 | import urlparse |
|---|
| 28 | import xmlrpclib |
|---|
| 29 | import socket |
|---|
| 30 | |
|---|
| 31 | plughelp.add('urlinfo', 'Gets information about URLs spoken on channel') |
|---|
| 32 | |
|---|
| 33 | cfg = Persist('urlinfo', {}) |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | |
|---|
| 37 | def sanitize(text): |
|---|
| 38 | text = text.strip() |
|---|
| 39 | |
|---|
| 40 | |
|---|
| 41 | text = re.sub('\s\s+', ' ', text) |
|---|
| 42 | |
|---|
| 43 | tmp = '' |
|---|
| 44 | for i in text.split(' '): |
|---|
| 45 | if len(i) >= 5: |
|---|
| 46 | if i.find('www.') != -1 or i.find('http') != -1: |
|---|
| 47 | |
|---|
| 48 | tmp += i + ' ' |
|---|
| 49 | |
|---|
| 50 | tmp = tmp.strip(); |
|---|
| 51 | |
|---|
| 52 | tmp2 = '' |
|---|
| 53 | for i in tmp.split(' '): |
|---|
| 54 | if (i[0] == '(' and i[-1] == ')') or (i[0] == '[' and i[-1] == ']') or (i[0] == '<' and i[-1] == '>') or (i[0] == '{' and i[-1] == '}'): |
|---|
| 55 | |
|---|
| 56 | tmp2 += i[1:-1:1] + ' ' |
|---|
| 57 | else: |
|---|
| 58 | tmp2 += i + ' ' |
|---|
| 59 | |
|---|
| 60 | tmp2 = tmp2.strip(); |
|---|
| 61 | tmp = '' |
|---|
| 62 | for i in tmp2.split(' '): |
|---|
| 63 | if i.find('www.') == 0: |
|---|
| 64 | |
|---|
| 65 | tmp += 'http://' + i + ' ' |
|---|
| 66 | else: |
|---|
| 67 | tmp += i + ' ' |
|---|
| 68 | |
|---|
| 69 | tmp = tmp.strip(); |
|---|
| 70 | out = tmp; |
|---|
| 71 | |
|---|
| 72 | return out; |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | def getUrls(text): |
|---|
| 76 | regex = r"http[s]?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]" |
|---|
| 77 | p = re.compile(regex) |
|---|
| 78 | urls = [] |
|---|
| 79 | |
|---|
| 80 | for i in text.split(' '): |
|---|
| 81 | for x in p.findall(i): |
|---|
| 82 | url = urlparse.urlparse(x) |
|---|
| 83 | if url.geturl() not in urls: |
|---|
| 84 | urls.append(url.geturl()) |
|---|
| 85 | |
|---|
| 86 | return urls |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | def getUrlInfo(text): |
|---|
| 90 | out = '' |
|---|
| 91 | text = sanitize(text) |
|---|
| 92 | urls = getUrls(text) |
|---|
| 93 | |
|---|
| 94 | if len(urls): |
|---|
| 95 | idx = 1 |
|---|
| 96 | for i in urls: |
|---|
| 97 | o = '' |
|---|
| 98 | try: |
|---|
| 99 | socket.setdefaulttimeout(30) |
|---|
| 100 | |
|---|
| 101 | server = xmlrpclib.ServerProxy("http://whatisthisfile.appspot.com/xmlrpc") |
|---|
| 102 | rlog(10, 'urlinfo', "XMLRPC query: %s" % i) |
|---|
| 103 | urlinfo = server.app.query(i) |
|---|
| 104 | |
|---|
| 105 | if urlinfo.has_key('html'): |
|---|
| 106 | if urlinfo['html'].has_key('title'): |
|---|
| 107 | o += 'Title: "%s" ' % urlinfo['html']['title'].strip() |
|---|
| 108 | elif urlinfo.has_key('image'): |
|---|
| 109 | o += 'Image: %dx%d ' % (urlinfo['image']['width'], urlinfo['image']['height']) |
|---|
| 110 | |
|---|
| 111 | if urlinfo.has_key('real_url'): |
|---|
| 112 | if urlinfo['real_url'] != i: |
|---|
| 113 | o += 'Redirect: %s ' % (urlinfo['real_url']) |
|---|
| 114 | |
|---|
| 115 | if len(o): |
|---|
| 116 | if len(urls) > 1: |
|---|
| 117 | out += ' ' + str(idx) + '. ' |
|---|
| 118 | idx += 1 |
|---|
| 119 | |
|---|
| 120 | out += o |
|---|
| 121 | |
|---|
| 122 | except Exception: |
|---|
| 123 | pass |
|---|
| 124 | return out.strip() |
|---|
| 125 | |
|---|
| 126 | |
|---|
| 127 | def catchHasUrls(bot, ievent): |
|---|
| 128 | if cfg.data.has_key(bot.name) and cfg.data[bot.name].has_key(ievent.printto) and cfg.data[bot.name][ievent.printto]: |
|---|
| 129 | if len(ievent.txt) >= 5: |
|---|
| 130 | if (ievent.txt.find('www.') != -1) or (ievent.txt.find('http') != -1): |
|---|
| 131 | return 1 |
|---|
| 132 | return 0 |
|---|
| 133 | |
|---|
| 134 | |
|---|
| 135 | def catchUrls(bot, ievent): |
|---|
| 136 | ievent.reply(getUrlInfo(ievent.txt)) |
|---|
| 137 | |
|---|
| 138 | callbacks.add('PRIVMSG', catchUrls, catchHasUrls, threaded=True) |
|---|
| 139 | |
|---|
| 140 | |
|---|
| 141 | def handle_urlinfo_enable(bot, ievent): |
|---|
| 142 | if not cfg.data.has_key(bot.name): |
|---|
| 143 | cfg.data[bot.name] = {} |
|---|
| 144 | cfg.data[bot.name][ievent.printto] = True |
|---|
| 145 | cfg.save() |
|---|
| 146 | ievent.reply('urlinfo enabled') |
|---|
| 147 | |
|---|
| 148 | cmnds.add('urlinfo-enable', handle_urlinfo_enable, 'OPER') |
|---|
| 149 | examples.add('urlinfo-enable', 'enable urlinfo in the channel', 'urlinfo-enable') |
|---|
| 150 | |
|---|
| 151 | |
|---|
| 152 | def handle_urlinfo_disable(bot, ievent): |
|---|
| 153 | if cfg.data.has_key(bot.name): |
|---|
| 154 | cfg.data[bot.name][ievent.printto] = False |
|---|
| 155 | cfg.save() |
|---|
| 156 | ievent.reply('urlinfo disabled') |
|---|
| 157 | |
|---|
| 158 | cmnds.add('urlinfo-disable', handle_urlinfo_disable, 'OPER') |
|---|
| 159 | examples.add('urlinfo-disable', 'disable urlinfo in the channel', 'urlinfo-disable') |
|---|
| 160 | |
|---|
| 161 | def handle_urlinfo_list(bot, ievent): |
|---|
| 162 | chans = [] |
|---|
| 163 | names = cfg.data.keys() |
|---|
| 164 | names.sort() |
|---|
| 165 | |
|---|
| 166 | for name in names: |
|---|
| 167 | targets = cfg.data[name].keys() |
|---|
| 168 | targets.sort() |
|---|
| 169 | chans.append('%s: %s' % (name, ' '.join(targets))) |
|---|
| 170 | if not chans: |
|---|
| 171 | ievent.reply('none') |
|---|
| 172 | else: |
|---|
| 173 | ievent.reply('urlinfo enabled on channels: %s' % ', '.join(chans)) |
|---|
| 174 | |
|---|
| 175 | cmnds.add('urlinfo-list', handle_urlinfo_list, 'OPER') |
|---|
| 176 | examples.add('urlinfo-list', 'show in which channels urlinfo is enabled', 'urlinfo-list') |
|---|