Skip to content

Instantly share code, notes, and snippets.

@oscarduignan
Last active November 25, 2024 10:39
Show Gist options
  • Select an option

  • Save oscarduignan/adb5d0276fccbec40d31dcc4bc8f9b33 to your computer and use it in GitHub Desktop.

Select an option

Save oscarduignan/adb5d0276fccbec40d31dcc4bc8f9b33 to your computer and use it in GitHub Desktop.
What-if'ing what something that gathered info about a collection of repos locally could look like for generating adhoc reports on some metrics
//> using scala 3.3
//> using toolkit typelevel:0.1.29
//> using dep com.softwaremill.macwire::macros:2.6.4
//> using dep org.scalameta:scalameta_2.13:4.7.7
//> using dep com.lihaoyi::pprint:0.9.0
import com.softwaremill.macwire.*
import cats.effect.*
import cats.syntax.all.*
import scala.concurrent.duration.*
import java.nio.file.Path
import cats.effect.unsafe.IORuntime
import scala.meta.internal.semanticdb.{Locator, MethodSignature, SymbolInformation, TypeRef, ValueSignature}
import scala.io.Source
import scala.meta.internal.semanticdb.SymbolInformation.Kind.METHOD
import scala.collection.concurrent.TrieMap
import scala.util.Using
case class TeamId(value: String)
case class Team(id: TeamId, name: String)
trait Teams:
def getTeamById(id: TeamId): Option[Team]
object Teams:
def of(teams: Team*): Teams = new Teams:
private val database = teams.map(team => team.id -> team).toMap
override def getTeamById(id: TeamId): Option[Team] = database.get(id)
given (using deps: Dependencies): Teams = deps.teams
case class RepoId(value: String)
case class Repository(id: RepoId, path: Path, teamId: TeamId)
trait Repos:
def getReposByTeam(id: TeamId): Seq[Repository]
def getRepoById(id: RepoId): Option[Repository]
object Repos:
def of(repos: Repository*): Repos = new Repos:
private val byId = repos.map(repo => repo.id -> repo).toMap
private val byTeam = repos.groupBy(_.teamId)
override def getReposByTeam(id: TeamId): Seq[Repository] = byTeam.getOrElse(id, Seq.empty)
override def getRepoById(id: RepoId): Option[Repository] = byId.get(id)
def repos(using deps: Dependencies): Repos = deps.repos
given (using deps: Dependencies): Repos = deps.repos
class ResourceManager(using runtime: IORuntime):
private val shutdown: Ref[IO, IO[Unit]] = Ref.unsafe(IO.unit)
def manage[A](resource: Resource[IO, A]): A =
resource.allocated
.flatMap { case (a, release) =>
shutdown.update(release *> _).map(_ => a)
}
.unsafeRunSync()
private def shutdownAll: IO[Unit] =
shutdown.getAndSet(IO.unit).flatten
object ResourceManager:
def apply(runtime: IORuntime): Resource[IO, ResourceManager] =
Resource.make { IO(new ResourceManager(using runtime)) } { _.shutdownAll }
def usingResourceManager[A](block: ResourceManager ?=> IO[A]): IO[A] =
ResourceManager(IORuntime.global).use { implicit rm => block }
extension [A](resource: Resource[IO, A])(using withinScope: ResourceManager) def useInScope: A = withinScope.manage(resource)
trait ConnectionPool
object ConnectionPool:
def create: Resource[IO, ConnectionPool] =
Resource.make {
IO.println("✅Starting a fake connection pool")
*> IO.sleep(1.second)
*> IO(new ConnectionPool {})
} { _ =>
IO.println("🔻Closing connection pool")
*> IO.sleep(2.seconds)
*> IO.println("✅Connection pool closed")
}
trait HttpClient:
def get(url: String): IO[String]
class HttpClientImpl(connectionPool: ConnectionPool) extends HttpClient:
def get(url: String): IO[String] =
IO.println(s"🛜HTTP GET $url")
*> IO.pure(s"response from $url")
object Github:
opaque type OrgName = String
object OrgName:
def apply(value: String): OrgName = value
opaque type RepoName = String
object RepoName:
def apply(value: String): RepoName = value
case class Repo(org: OrgName, name: RepoName)
trait GithubClient:
def getLikes(repo: Github.Repo): IO[GithubLikes]
object GithubClient:
def live(httpClient: HttpClient): GithubClient = (repo: Github.Repo) =>
IO.println(s"💾Fetching github likes for $repo")
*> httpClient.get(s"https://github.com/${repo.org}/${repo.name}")
*> IO.pure(GithubLikes(42))
def githubClient(using githubClient: GithubClient) = githubClient
given (using deps: Dependencies): GithubClient = deps.githubClient
trait Dependencies(using ResourceManager):
// connectionPool is a Resource that needs to acquire/release things, useInScope is an extension method
private lazy val connectionPool = ConnectionPool.create.useInScope
lazy val httpClient: HttpClient = new HttpClientImpl(connectionPool) // or wire[HttpClientImpl]
lazy val githubClient: GithubClient = wireWith(GithubClient.live)
lazy val teams: Teams = Teams.of(
Team(TeamId("platui"), "Plat UI"),
Team(TeamId("platops"), "PlatOps")
)
lazy val repos: Repos = Repos.of(
Repository(RepoId("contact-frontend"), Path.of("local-git-repos/contact-frontend"), TeamId("platui")),
Repository(RepoId("accessibility-statement-frontend"), Path.of("local-git-repos/accessibility-statement-frontend"), TeamId("platui")),
Repository(RepoId("help-frontend"), Path.of("local-git-repos/help-frontend"), TeamId("platui"))
)
trait TestDependencies extends Dependencies:
override lazy val httpClient: HttpClient = (url: String) =>
IO.println("Using a fake http client, so you should not see connection pool started")
*> IO.pure("fake response")
// what are the benefits of having the cache within the metric vs having
// a shared cache that all metrics use? is it worth it for what I'm doing?
// do I need to have a way of defining how the cache key is calculated?
trait Metric[E, A]:
private val cache = TrieMap.empty[E, Deferred[IO, A]]
protected def from(entity: E)(using Dependencies): IO[A]
def compute(entity: E)(using Dependencies): IO[A] =
Deferred[IO, A].flatMap: deferred =>
cache.putIfAbsent(entity, deferred) match
case Some(existingDeferred) =>
existingDeferred.get
case None =>
from(entity).attempt.flatMap:
case Right(result) =>
deferred.complete(result) *> IO.pure(result)
case Left(error) =>
cache.remove(entity)
IO.raiseError(error)
object Metric:
def apply[E, A](f: E => Dependencies ?=> IO[A]): Metric[E, A] =
new Metric[E, A]:
def from(entity: E)(using deps: Dependencies): IO[A] = f(entity)
def createProcess(command: List[String], directory: java.io.File): Resource[IO, Process] =
Resource.make {
IO.blocking(
new ProcessBuilder(command: _*)
.directory(directory)
.redirectErrorStream(true)
.start()
)
} { process =>
IO.blocking(process.destroy())
.guarantee(IO.blocking(process.waitFor()).void)
}
case class CompiledWithSemanticDB(exitCode: Int) // todo should it save path?
given Metric[Repository, CompiledWithSemanticDB] = Metric: repo =>
createProcess(
List("sbt", "set semanticdbEnabled := true; clean; compile"),
repo.path.toFile
).use: compilationProcess =>
IO.blocking:
println(s"🏗️compiling with semanticdb: $repo")
CompiledWithSemanticDB(compilationProcess.waitFor())
<* IO.println("💪compilation completed")
object PlayFrontendHmrc:
case class TwirlTemplate(path: Path, contents: String)
case class ComponentUsage(template: TwirlTemplate, component: String, variable: String, usages: Seq[String])
private def findUsagesIn(template: String, variable: String): Seq[String] =
s"""(?s)@?${variable}(?=\\()(?:(?=.*?\\((?!.*?\\1)(.*\\)(?!.*\\2).*))(?=.*?\\)(?!.*?\\2)(.*)).)+?.*?(?=\\1)[^(]*(?=\\2$$)""".stripMargin.r
.findAllMatchIn(template)
.map(_.group(0))
.toList
def findComponentUsages(repo: Path): Seq[ComponentUsage] =
val usages = collection.mutable.ListBuffer[ComponentUsage]()
Locator(repo.resolve("target"))((path, result) => {
usages ++= result.documents.flatMap((document) => {
document.symbols
.collect {
case SymbolInformation(_, _, METHOD, _, displayName, signature, _, _, _, _) =>
signature match {
case ValueSignature(tpe: TypeRef) if tpe.symbol.matches("""^uk/gov/hmrc/(?:govuk|hmrc)frontend/views/html.*$""") =>
Some((document.uri, displayName, tpe.symbol))
case MethodSignature(_, _, returnType: TypeRef)
if returnType.symbol.matches("""^uk/gov/hmrc/(?:govuk|hmrc)frontend/views/html.*$""") =>
Some((document.uri, displayName, returnType.symbol))
case _ => None
}
case _ => None
}
.flatten
.map {
case (compiledPath, variable, component) => {
val twirlPath = repo.resolve(
compiledPath
.replaceFirst("^.*/main/", "app/")
.replaceFirst("/html/", "/")
.replaceFirst("template.scala", "scala.html")
)
val twirlContents = Using(Source.fromFile(twirlPath.toFile))(_.mkString)
.getOrElse("unable to read twirl template")
ComponentUsage(
TwirlTemplate(
twirlPath,
twirlContents
),
component,
variable,
findUsagesIn(twirlContents, variable)
)
}
}
})
})
usages.toSeq
given Metric[Repository, Seq[PlayFrontendHmrc.ComponentUsage]] = Metric: repo =>
for {
_ <- repo.metric[CompiledWithSemanticDB] // todo return empty seq or make it Either of error or seq of usages
_ <- IO.println("🔎Searching for usages of play-frontend-hmrc components")
usages = PlayFrontendHmrc.findComponentUsages(repo.path)
_ <- IO.println(s"🧠Found ${usages.length} usages of our components in $repo")
} yield usages
case class LineCount(count: Int)
given Metric[Repository, LineCount] = Metric: repo =>
IO.println(s"👨‍💻Computing line count for $repo")
*> IO.sleep(5.second)
*> IO.pure(LineCount(42))
given Metric[Team, LineCount] = Metric: team =>
summon[Repos]
.getReposByTeam(team.id)
.parTraverse(_.metric[LineCount].map(_.count))
.map(_.sum)
.map(LineCount.apply)
// would be cool to have an example of a GithubClient that had some permits
// and was rate limited in maybe a few different ways, maybe with a circuit
// breaker too just to totally over-engineer it for learning.
case class GithubLikes(count: Int)
given Metric[Repository, GithubLikes] = Metric: repo =>
import Github.*
for {
likes <- githubClient.getLikes(Repo(OrgName("hmrc"), RepoName(repo.id.value)))
_ <- IO.sleep(5.second).onCancel(IO.println("Cleaning github context after early cancellation"))
} yield likes
extension [E](entity: E)
def metric[A](using metric: Metric[E, A], deps: Dependencies): IO[A] =
metric.compute(entity)
extension (usages: Seq[PlayFrontendHmrc.ComponentUsage])
def omittingTemplateContents: Seq[PlayFrontendHmrc.ComponentUsage] =
usages.map(usage => usage.copy(template = usage.template.copy(contents = "<<OMITTED>>")))
object Metrics extends IOApp.Simple:
def run = usingResourceManager:
given deps: Dependencies = new Dependencies {} // or new TestDependencies {}
val repo = repos.getRepoById(RepoId("contact-frontend")).get
for
_ <- IO.println("Hello toolkit!")
_ <- IO.println("Starting to collect metrics")
// in sequence:
// lines <- repo.metric[LineCount]
// likes <- repo.metric[GithubLikes]
// in parallel:
metrics <- (
repo.metric[LineCount],
repo.metric[GithubLikes],
repo.metric[Seq[PlayFrontendHmrc.ComponentUsage]]
).parMapN((_, _, _))
(lines, likes, componentUsages) = metrics
teamLineCount <- summon[Teams].getTeamById(repo.teamId).get.metric[LineCount]
_ <- IO.println(s"Line count: $lines")
_ <- IO.println(s"Github likes: $likes")
_ <- IO.println(s"Team line count: $teamLineCount")
_ <- IO(pprint.pprintln(componentUsages.omittingTemplateContents))
_ <- repo.metric[LineCount]
_ <- IO.sleep(5.seconds)
platformTotal <- (
summon[Teams].getTeamById(TeamId("platui")).get.metric[LineCount],
summon[Teams].getTeamById(TeamId("platops")).get.metric[LineCount]
).parMapN(_.count + _.count)
_ <- IO.println(s"🎉All-together across all teams we wrote $platformTotal lines 👏")
// showing how you can use that little helper anywhere we've given our deps:
response <- deps.httpClient.get("http://example.com") <* IO.println("📩Posted results somewhere")
_ <- IO.println("Finished collecting metrics")
yield ()
@oscarduignan
Copy link
Author

current version https://gist.github.com/oscarduignan/9b7b86971ff87ac3829377267bbde2b2 just hacking in my own time out of curiosity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment