spark问题记录
舟率率 11/17/2023 spark
# 用spark本地写s3时报错,PartialGroupNameException Does not support partial group name resolution on Windows
解决方法
注释掉System.setProperty("HADOOP_USER_NAME", "hdfs") 连接hdfs时,才需要这个配置
1
# 用spark本地写s3时报错,java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
解决方法
重写NativeIO
# spark报错:org.apache.thrift.TApplicationException: Required field 'filesAdded' is unset! Struct:InsertEventRequestData(filesAdded:null)
解决方法
不知道为什么,在代码和hive-site.xml中设置无报错,但通过spark 提交命令配置就无法生效
hive.metadata.dml.events=false
1
# spark写s3报错:java.lang.UnsupportedOperationException: S3AFileSystem doesn't support getAclStatus
报错图片
原因
S3AFileSystem类没有实现getAclStatus方法
1
解决方法
在S3AFileSystem类加入以下代码
/**
* TODO 获取ACLStatus需要
*
* @param aclGrants
* @return
*/
private static List<AclEntry> convertAclGrants(List<Grant> aclGrants){
List<AclEntry> aclEntries=new ArrayList<>();
for(Grant grant:aclGrants){
String granteeId=grant.getGrantee().getIdentifier();
FsAction fsAction=convertPermission(grant.getPermission().toString());
AclEntry aclEntry=new AclEntry.Builder()
.setPermission(fsAction)
.setScope(AclEntryScope.ACCESS)
.setType(AclEntryType.USER)
.setName(granteeId)
.build();
aclEntries.add(aclEntry);
}
return aclEntries;
}
/**
* TODO 获取ACLStatus需要
*
* @param permission
* @return
*/
private static FsAction convertPermission(String permission){
switch(permission){
case"FullControl":
case"FULL_CONTROL":
return FsAction.ALL;
case"Write":
return FsAction.WRITE;
case"Read":
return FsAction.READ;
default:
return FsAction.NONE;
}
}
/**
* TODO 手动实现 java.lang.UnsupportedOperationException: S3AFileSystem doesn't support getAclStatus
* @param path
* @return
* @throws IOException
*/
@Override
public AclStatus getAclStatus(Path path)throws IOException{
GetBucketAclRequest request=new GetBucketAclRequest(this.bucket);
AccessControlList acl=this.s3.getBucketAcl(request);
// 将 Amazon S3 的 AclStatus 转换为 Hadoop 中的 AclStatus
return new AclStatus.Builder()
.owner(acl.getOwner().getId())
.group("")
.addEntries(convertAclGrants(acl.getGrantsAsList()))
.build();
}
private void initCannedAcls(Configuration conf){
String cannedACLName=conf.get("fs.s3a.acl.default",CannedAccessControlList.BucketOwnerFullControl.name());
if(!cannedACLName.isEmpty()){
this.cannedACL=CannedAccessControlList.valueOf(cannedACLName);
}else{
this.cannedACL=null;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72