Created
January 1, 2026 11:33
-
-
Save YellowOnion/49f103a1ca4ba912aaad83f70cc26796 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE OverloadedStrings #-} | |
| {-# LANGUAGE RecordWildCards #-} | |
| module Main where | |
| import qualified Data.ByteString.Lazy as LBS | |
| import qualified Text.Megaparsec as P | |
| import qualified Text.Megaparsec.Char as P | |
| import qualified Data.Text as T | |
| import qualified Data.Text.IO as T | |
| import Control.Applicative | |
| import Control.Monad | |
| import Data.Void | |
| import Data.Maybe | |
| import Data.Char | |
| import Data.Foldable.WithIndex | |
| import Network.HTTP.Simple | |
| import Numeric (readHex) | |
| import Data.Functor (void) | |
| hEADERS = [ ("User-Agent", "YellowOnion/nixos-config/abyss") ] | |
| type Parser = P.Parsec Void T.Text | |
| newtype Host = Host T.Text deriving (Eq, Show) | |
| skipTillEoL :: Parser () | |
| skipTillEoL = P.skipMany (P.satisfy $ (/='\n')) | |
| -- | Parser Element type | |
| -- | |
| -- Examples: | |
| -- >>> P.parseMaybe elementType "$doc" | |
| -- Just "doc" | |
| -- | |
| -- >>> P.parseMaybe elementType "$all" | |
| -- Just "all" | |
| elementType :: Parser T.Text | |
| elementType = P.string "$" | |
| *> ( P.string "doc" | |
| <|> P.string "document" | |
| <|> P.string "all" | |
| <|> P.takeWhile1P Nothing (\x -> x /= ',' && (isLower x || isDigit x)) | |
| ) | |
| -- | Parse Element types | |
| -- Examples: | |
| -- >>> P.parseMaybe elements "$doc,$all" | |
| -- Just ["doc","all"] | |
| -- | |
| -- >>> P.parseMaybe elements "$doc,$3p" | |
| -- Just ["doc","3p"] | |
| -- | |
| -- >>> P.parseMaybe elements "" | |
| -- Just [] | |
| elements :: Parser [T.Text] | |
| elements = elementType `P.sepBy` "," | |
| -- | Parse an IPv4 address | |
| -- Examples | |
| -- >>> P.parseMaybe ipv4 "127.0.0.1" | |
| -- Just [127,0,0,1] | |
| -- | |
| -- >>> P.parseMaybe ipv4 "0.0.0.0" | |
| -- Just [0,0,0,0] | |
| -- | |
| -- >>> P.parseMaybe ipv4 "0.0" | |
| -- Nothing | |
| ipv4 :: Parser [Int] | |
| ipv4 = do | |
| w <- num | |
| P.char '.' | |
| x <- num | |
| P.char '.' | |
| y <- num | |
| P.char '.' | |
| z <- num | |
| pure [w, x, y , z] | |
| where num = read <$> P.some P.digitChar | |
| -- | Parse ipv6 address (badly) | |
| -- Examples: | |
| -- >>> P.parseMaybe ipv6 "d53f:96a8:7401:e687:60fd:4cb6:444e:4ac8" | |
| -- Just [54591,38568,29697,59015,24829,19638,17486,19144] | |
| -- | |
| -- >>> P.parseMaybe ipv6 "0::1" | |
| -- Just [0,1] | |
| -- | |
| -- >>> P.parseMaybe ipv6 "::1" | |
| -- Just [1] | |
| ipv6 :: Parser [Int] | |
| ipv6 = P.option () (void sep) *> hexNum `P.sepBy1` sep | |
| where | |
| sep = P.string "::" <|> P.string ":" | |
| hexNum = toHex <$> P.some P.hexDigitChar | |
| toHex = ifoldl' (\i x y -> x + y * 16^i ) 0 . reverse . fmap digitToInt | |
| -- | Parse host | |
| -- Examples: | |
| -- >>> P.parseMaybe matchHost "host.com" | |
| -- Just "host.com" | |
| -- | |
| -- >>> P.parseMaybe matchHost "10.0.0.1" | |
| -- Nothing | |
| -- | |
| -- >>> P.parseMaybe matchHost "00.ishost.com" | |
| -- Just "00.ishost.com" | |
| matchHost :: Parser T.Text | |
| matchHost = do | |
| heads <- parts | |
| last <- P.takeWhile1P (Just "host-string-end") (\x -> isAlpha x && x /= '^') | |
| pure $ heads <> last | |
| where | |
| parts = mconcat <$> P.some (P.try part) | |
| part = (<>) <$> P.takeWhile1P (Just "host-string") isAlphaNum <*> P.string "." | |
| -- | Parse line | |
| -- Examples: | |
| -- >>> P.parseMaybe line "::0 host.com #this is a comment" | |
| -- Just (Just (Host "host.com")) | |
| -- | |
| -- >>> P.parseMaybe line "||host.com^$doc" | |
| -- Just (Just (Host "host.com")) | |
| line :: Parser (Maybe Host) | |
| line = emptyLine | |
| <|> comment | |
| <|> P.try adbpHost | |
| <|> P.try unixHost | |
| <|> (skipTillEoL *> pure Nothing) | |
| where | |
| emptyLine = P.lookAhead P.eol *> pure Nothing | |
| comment = (P.string "!" <|> P.string "#" ) *> skipTillEoL *> pure Nothing | |
| -- | Adblock Plus host line | |
| adbpHost = do | |
| _ <- P.string "||" | |
| host <- matchHost | |
| _ <- P.string "^" | |
| e <- elements | |
| skipTillEoL | |
| if all (`elem` ["doc", "document", "all"]) e || e == [] | |
| then pure . Just . Host $ host | |
| else empty | |
| -- unit host files style line | |
| unixHost = do | |
| ip <- ipv4 <|> ipv6 | |
| P.hspace1 | |
| host <- matchHost | |
| skipTillEoL | |
| pure . Just . Host $ host | |
| adblockParser :: Parser [Host] | |
| adblockParser = do | |
| r <- (line `P.sepBy` (P.newline)) | |
| _ <- P.eof | |
| pure (catMaybes r) | |
| main :: IO () | |
| main = do | |
| Just list <- P.parseMaybe adblockParser <$> T.getContents | |
| forM_ list $ putStrLn . show | |
| putStrLn . show . length $ list |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment