Skip to content

Instantly share code, notes, and snippets.

@YellowOnion
Created January 1, 2026 11:33
Show Gist options
  • Select an option

  • Save YellowOnion/49f103a1ca4ba912aaad83f70cc26796 to your computer and use it in GitHub Desktop.

Select an option

Save YellowOnion/49f103a1ca4ba912aaad83f70cc26796 to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where
import qualified Data.ByteString.Lazy as LBS
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Control.Applicative
import Control.Monad
import Data.Void
import Data.Maybe
import Data.Char
import Data.Foldable.WithIndex
import Network.HTTP.Simple
import Numeric (readHex)
import Data.Functor (void)
hEADERS = [ ("User-Agent", "YellowOnion/nixos-config/abyss") ]
type Parser = P.Parsec Void T.Text
newtype Host = Host T.Text deriving (Eq, Show)
skipTillEoL :: Parser ()
skipTillEoL = P.skipMany (P.satisfy $ (/='\n'))
-- | Parser Element type
--
-- Examples:
-- >>> P.parseMaybe elementType "$doc"
-- Just "doc"
--
-- >>> P.parseMaybe elementType "$all"
-- Just "all"
elementType :: Parser T.Text
elementType = P.string "$"
*> ( P.string "doc"
<|> P.string "document"
<|> P.string "all"
<|> P.takeWhile1P Nothing (\x -> x /= ',' && (isLower x || isDigit x))
)
-- | Parse Element types
-- Examples:
-- >>> P.parseMaybe elements "$doc,$all"
-- Just ["doc","all"]
--
-- >>> P.parseMaybe elements "$doc,$3p"
-- Just ["doc","3p"]
--
-- >>> P.parseMaybe elements ""
-- Just []
elements :: Parser [T.Text]
elements = elementType `P.sepBy` ","
-- | Parse an IPv4 address
-- Examples
-- >>> P.parseMaybe ipv4 "127.0.0.1"
-- Just [127,0,0,1]
--
-- >>> P.parseMaybe ipv4 "0.0.0.0"
-- Just [0,0,0,0]
--
-- >>> P.parseMaybe ipv4 "0.0"
-- Nothing
ipv4 :: Parser [Int]
ipv4 = do
w <- num
P.char '.'
x <- num
P.char '.'
y <- num
P.char '.'
z <- num
pure [w, x, y , z]
where num = read <$> P.some P.digitChar
-- | Parse ipv6 address (badly)
-- Examples:
-- >>> P.parseMaybe ipv6 "d53f:96a8:7401:e687:60fd:4cb6:444e:4ac8"
-- Just [54591,38568,29697,59015,24829,19638,17486,19144]
--
-- >>> P.parseMaybe ipv6 "0::1"
-- Just [0,1]
--
-- >>> P.parseMaybe ipv6 "::1"
-- Just [1]
ipv6 :: Parser [Int]
ipv6 = P.option () (void sep) *> hexNum `P.sepBy1` sep
where
sep = P.string "::" <|> P.string ":"
hexNum = toHex <$> P.some P.hexDigitChar
toHex = ifoldl' (\i x y -> x + y * 16^i ) 0 . reverse . fmap digitToInt
-- | Parse host
-- Examples:
-- >>> P.parseMaybe matchHost "host.com"
-- Just "host.com"
--
-- >>> P.parseMaybe matchHost "10.0.0.1"
-- Nothing
--
-- >>> P.parseMaybe matchHost "00.ishost.com"
-- Just "00.ishost.com"
matchHost :: Parser T.Text
matchHost = do
heads <- parts
last <- P.takeWhile1P (Just "host-string-end") (\x -> isAlpha x && x /= '^')
pure $ heads <> last
where
parts = mconcat <$> P.some (P.try part)
part = (<>) <$> P.takeWhile1P (Just "host-string") isAlphaNum <*> P.string "."
-- | Parse line
-- Examples:
-- >>> P.parseMaybe line "::0 host.com #this is a comment"
-- Just (Just (Host "host.com"))
--
-- >>> P.parseMaybe line "||host.com^$doc"
-- Just (Just (Host "host.com"))
line :: Parser (Maybe Host)
line = emptyLine
<|> comment
<|> P.try adbpHost
<|> P.try unixHost
<|> (skipTillEoL *> pure Nothing)
where
emptyLine = P.lookAhead P.eol *> pure Nothing
comment = (P.string "!" <|> P.string "#" ) *> skipTillEoL *> pure Nothing
-- | Adblock Plus host line
adbpHost = do
_ <- P.string "||"
host <- matchHost
_ <- P.string "^"
e <- elements
skipTillEoL
if all (`elem` ["doc", "document", "all"]) e || e == []
then pure . Just . Host $ host
else empty
-- unit host files style line
unixHost = do
ip <- ipv4 <|> ipv6
P.hspace1
host <- matchHost
skipTillEoL
pure . Just . Host $ host
adblockParser :: Parser [Host]
adblockParser = do
r <- (line `P.sepBy` (P.newline))
_ <- P.eof
pure (catMaybes r)
main :: IO ()
main = do
Just list <- P.parseMaybe adblockParser <$> T.getContents
forM_ list $ putStrLn . show
putStrLn . show . length $ list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment