Hadoop version
Hadoop fs -mkdir path
Hadoop fs -ls path
Hadoop fs -put source path
Hadoop fs -cat file.txt
Hadoop fs -touchz file.txt
Hadoop fsck path
Hadoop fs -df path
Hadoop fs
Echo “text” > file.txt
Hadoop fs -moveFromLocal src dest
Hadoop fs -rm path
Hadoop fs -cp src dest
Hadoop fs -mv src dest
Hadoop fs -setrep -w 2 file.txt
Hadoop-expunge
Docker run -d —name namenode hadoop-namenode
Docker run -d —name datanode hadoop-datanode
Docker exec -it namenode bash
Cd /root
Docker cp input.txt namenode:/root
Hadoop fs -put input.txt /input
Hadoop jar wordcounter.jar /input /output
Hadoop fs -ls /output
Hadoop fs -cat /output/part-00000
FROM bitnami/spark:latest
WORKDIR /app
COPY max_temperature.py .
COPY temperature_data.csv .
CMD ["spark-submit", "max_temperature.py"]
docker build -t spark-max-temp .
docker run -it spark-max-temp
from pyspark.sql import SparkSession, Row
spark = SparkSession.builder \
.appName("Max Temperature with Year") \
.getOrCreate()
data = spark.read.csv("temperature_data.csv", header = True, inferSchema = True)
max_temp_row = data.orderBy(data["temperature"].desc()).first()
if max_temp_row:
max_temp_df = spark.createDataFrame([
Row(year = max_temp_row["year"], max_temperature = max_temp_row["temperature"])
])
max_temp_df.show()
else:
print("No temperature data available")
spark.stop()
docker exec -it hive-hive-server hive