Last active
June 14, 2016 12:56
-
-
Save felixbuenemann/583c38e12a2132f6a486 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- strip_md(text) - strips markdown from text | |
| -- (c) 2014 Felix Buenemann https://github.com/felixbuenemann | |
| -- License: Public Domain | |
| CREATE OR REPLACE FUNCTION strip_md(input text) RETURNS text AS $$ | |
| BEGIN | |
| -- strip html tags | |
| input := regexp_replace(input, '<[^>]+>', '', 'g'); | |
| -- strip list leaders and blockquotes | |
| input := regexp_replace(input, '^([\s]*)([\*\-\+]|\d\.|>)\s+', '\1', 'ng'); | |
| -- strip setext header underlines | |
| input := regexp_replace(input, '^[=\-]{2,}\s*$', '', 'ng'); | |
| -- strip atx header hash chars | |
| input := regexp_replace(input, '^#{1,6}\s*', '', 'ng'); | |
| -- strip embeds (images) | |
| input := regexp_replace(input, '\!\[([^\[]+)\](\[\]|\([^\)]+\))', '', 'g'); | |
| -- strip links, keep body | |
| input := regexp_replace(input, '\[([^\[]+)\](\[\]|\([^\)]+\))', '\1', 'g'); | |
| -- strip link definitions | |
| input := regexp_replace(input, '\[([^\[]+)\]:\([^\)]+\)', '\1', 'g'); | |
| -- strip strong | |
| input := regexp_replace(input, '(\*\*)(\S.*?\S)\1', '\2', 'g'); | |
| input := regexp_replace(input, '(__)(\S.*?\S)\1', '\2', 'g'); | |
| -- strip emphasis | |
| input := regexp_replace(input, '(\*)(\S.*?\S)\1', '\2', 'g'); | |
| input := regexp_replace(input, '(_)(\S.*?\S)\1', '\2', 'g'); | |
| -- strip strikethrough | |
| input := regexp_replace(input, '(~~)(\S.*?\S)\1', '\2', 'g'); | |
| -- strip quotes | |
| input := regexp_replace(input, ':"(.*?)":', '\1', 'g'); | |
| -- strip inline code | |
| input := regexp_replace(input, '`(.*?)`', '\1', 'g'); | |
| -- strip fenced code blocks | |
| input = regexp_replace(input, '(`{3,})(.*?)\1', '\2', 'g'); | |
| -- strip horizontal rules | |
| input := regexp_replace(input, '^((- ?){3,}|(\* ?){3,})\s*$', '', 'ng'); | |
| -- remove windows style returns | |
| input := regexp_replace(input, '\r', '', 'g'); | |
| -- consolidate newlines | |
| input := regexp_replace(input, '\n{2,}', E'\n\n', 'g'); | |
| RETURN input; | |
| END; | |
| $$ LANGUAGE plpgsql IMMUTABLE; |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This currently removes the url from links. It can easily be changed by modifying the regex on line 17.